\documentclass[11pt]{article}
    \usepackage[UTF8]{ctex}
    \setCJKmainfont{NotoSerifCJKsc-Regular}
    \usepackage[breakable]{tcolorbox}
    \usepackage{parskip} % Stop auto-indenting (to mimic markdown behaviour)
    \usepackage{listings}
    \usepackage{fancyhdr}
    \usepackage{fancybox}
    \usepackage{tabularx}

    
    \usepackage{iftex}
    \ifPDFTeX
    	\usepackage[T1]{fontenc}
    	\usepackage{mathpazo}
    \else
    	\usepackage{fontspec}
    \fi

    % Basic figure setup, for now with no caption control since it's done
    % automatically by Pandoc (which extracts ![](path) syntax from Markdown).
    \usepackage{graphicx}
    % Maintain compatibility with old templates. Remove in nbconvert 6.0
    \let\Oldincludegraphics\includegraphics
    % Ensure that by default, figures have no caption (until we provide a
    % proper Figure object with a Caption API and a way to capture that
    % in the conversion process - todo).
    \usepackage{caption}
    \DeclareCaptionFormat{nocaption}{}
    \captionsetup{format=nocaption,aboveskip=0pt,belowskip=0pt}

    \usepackage{float}
    \floatplacement{figure}{H} % forces figures to be placed at the correct location
    \usepackage{xcolor} % Allow colors to be defined
    \usepackage{enumerate} % Needed for markdown enumerations to work
    \usepackage{geometry} % Used to adjust the document margins
    \usepackage{amsmath} % Equations
    \usepackage{amssymb} % Equations
    \usepackage{textcomp} % defines textquotesingle
    % Hack from http://tex.stackexchange.com/a/47451/13684:
    \AtBeginDocument{%
        \def\PYZsq{\textquotesingle}% Upright quotes in Pygmentized code
    }
    \usepackage{upquote} % Upright quotes for verbatim code
    \usepackage{eurosym} % defines \euro
    \usepackage[mathletters]{ucs} % Extended unicode (utf-8) support
    \usepackage{fancyvrb} % verbatim replacement that allows latex
    \usepackage{grffile} % extends the file name processing of package graphics 
                         % to support a larger range
    \makeatletter % fix for old versions of grffile with XeLaTeX
    \@ifpackagelater{grffile}{2019/11/01}
    {
      % Do nothing on new versions
    }
    {
      \def\Gread@@xetex#1{%
        \IfFileExists{"\Gin@base".bb}%
        {\Gread@eps{\Gin@base.bb}}%
        {\Gread@@xetex@aux#1}%
      }
    }
    \makeatother
    \usepackage[Export]{adjustbox} % Used to constrain images to a maximum size
    \adjustboxset{max size={0.8\linewidth}{0.8\paperheight}}

    % The hyperref package gives us a pdf with properly built
    % internal navigation ('pdf bookmarks' for the table of contents,
    % internal cross-reference links, web links for URLs, etc.)
    \usepackage{hyperref}
    % The default LaTeX title has an obnoxious amount of whitespace. By default,
    % titling removes some of it. It also provides customization options.
    \usepackage{titling}
    \usepackage{longtable} % longtable support required by pandoc >1.10
    \usepackage{booktabs}  % table support for pandoc > 1.12.2
    \usepackage[inline]{enumitem} % IRkernel/repr support (it uses the enumerate* environment)
    \usepackage[normalem]{ulem} % ulem is needed to support strikethroughs (\sout)
                                % normalem makes italics be italics, not underlines
    \usepackage{mathrsfs}
    

    
    % Colors for the hyperref package
    \definecolor{urlcolor}{rgb}{0,.145,.698}
    \definecolor{linkcolor}{rgb}{.71,0.21,0.01}
    \definecolor{citecolor}{rgb}{.12,.54,.11}

    % ANSI colors
    \definecolor{ansi-black}{HTML}{3E424D}
    \definecolor{ansi-black-intense}{HTML}{282C36}
    \definecolor{ansi-red}{HTML}{E75C58}
    \definecolor{ansi-red-intense}{HTML}{B22B31}
    \definecolor{ansi-green}{HTML}{00A250}
    \definecolor{ansi-green-intense}{HTML}{007427}
    \definecolor{ansi-yellow}{HTML}{DDB62B}
    \definecolor{ansi-yellow-intense}{HTML}{B27D12}
    \definecolor{ansi-blue}{HTML}{208FFB}
    \definecolor{ansi-blue-intense}{HTML}{0065CA}
    \definecolor{ansi-magenta}{HTML}{D160C4}
    \definecolor{ansi-magenta-intense}{HTML}{A03196}
    \definecolor{ansi-cyan}{HTML}{60C6C8}
    \definecolor{ansi-cyan-intense}{HTML}{258F8F}
    \definecolor{ansi-white}{HTML}{C5C1B4}
    \definecolor{ansi-white-intense}{HTML}{A1A6B2}
    \definecolor{ansi-default-inverse-fg}{HTML}{FFFFFF}
    \definecolor{ansi-default-inverse-bg}{HTML}{000000}

    % common color for the border for error outputs.
    \definecolor{outerrorbackground}{HTML}{FFDFDF}

    % commands and environments needed by pandoc snippets
    % extracted from the output of `pandoc -s`
    \providecommand{\tightlist}{%
      \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
    \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
    % Add ',fontsize=\small' for more characters per line
    \newenvironment{Shaded}{}{}
    \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
    \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.56,0.13,0.00}{{#1}}}
    \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
    \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
    \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.25,0.63,0.44}{{#1}}}
    \newcommand{\CharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\StringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textit{{#1}}}}
    \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{{#1}}}
    \newcommand{\AlertTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
    \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.02,0.16,0.49}{{#1}}}
    \newcommand{\RegionMarkerTok}[1]{{#1}}
    \newcommand{\ErrorTok}[1]{\textcolor[rgb]{1.00,0.00,0.00}{\textbf{{#1}}}}
    \newcommand{\NormalTok}[1]{{#1}}
    
    % Additional commands for more recent versions of Pandoc
    \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.53,0.00,0.00}{{#1}}}
    \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.25,0.44,0.63}{{#1}}}
    \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.73,0.40,0.53}{{#1}}}
    \newcommand{\ImportTok}[1]{{#1}}
    \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.73,0.13,0.13}{\textit{{#1}}}}
    \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.10,0.09,0.49}{{#1}}}
    \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.00,0.44,0.13}{\textbf{{#1}}}}
    \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.40,0.40,0.40}{{#1}}}
    \newcommand{\BuiltInTok}[1]{{#1}}
    \newcommand{\ExtensionTok}[1]{{#1}}
    \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.74,0.48,0.00}{{#1}}}
    \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.49,0.56,0.16}{{#1}}}
    \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.38,0.63,0.69}{\textbf{\textit{{#1}}}}}
    
    
    % Define a nice break command that doesn't care if a line doesn't already
    % exist.
    \def\br{\hspace*{\fill} \\* }
    % Math Jax compatibility definitions
    \def\gt{>}
    \def\lt{<}
    \let\Oldtex\TeX
    \let\Oldlatex\LaTeX
    \renewcommand{\TeX}{\textrm{\Oldtex}}
    \renewcommand{\LaTeX}{\textrm{\Oldlatex}}
    % Document parameters
    % Document title
    \title{第 1-3 章《数据分析》上机作业}
    
    
    
    
    
% Pygments definitions
\makeatletter
\def\PY@reset{\let\PY@it=\relax \let\PY@bf=\relax%
    \let\PY@ul=\relax \let\PY@tc=\relax%
    \let\PY@bc=\relax \let\PY@ff=\relax}
\def\PY@tok#1{\csname PY@tok@#1\endcsname}
\def\PY@toks#1+{\ifx\relax#1\empty\else%
    \PY@tok{#1}\expandafter\PY@toks\fi}
\def\PY@do#1{\PY@bc{\PY@tc{\PY@ul{%
    \PY@it{\PY@bf{\PY@ff{#1}}}}}}}
\def\PY#1#2{\PY@reset\PY@toks#1+\relax+\PY@do{#2}}

\@namedef{PY@tok@w}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.73,0.73}{##1}}}
\@namedef{PY@tok@c}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\@namedef{PY@tok@cp}{\def\PY@tc##1{\textcolor[rgb]{0.74,0.48,0.00}{##1}}}
\@namedef{PY@tok@k}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kp}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kt}{\def\PY@tc##1{\textcolor[rgb]{0.69,0.00,0.25}{##1}}}
\@namedef{PY@tok@o}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@ow}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\@namedef{PY@tok@nb}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@nf}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@nc}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@nn}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@ne}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.82,0.25,0.23}{##1}}}
\@namedef{PY@tok@nv}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@no}{\def\PY@tc##1{\textcolor[rgb]{0.53,0.00,0.00}{##1}}}
\@namedef{PY@tok@nl}{\def\PY@tc##1{\textcolor[rgb]{0.63,0.63,0.00}{##1}}}
\@namedef{PY@tok@ni}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.60,0.60,0.60}{##1}}}
\@namedef{PY@tok@na}{\def\PY@tc##1{\textcolor[rgb]{0.49,0.56,0.16}{##1}}}
\@namedef{PY@tok@nt}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@nd}{\def\PY@tc##1{\textcolor[rgb]{0.67,0.13,1.00}{##1}}}
\@namedef{PY@tok@s}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sd}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@si}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\@namedef{PY@tok@se}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.13}{##1}}}
\@namedef{PY@tok@sr}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.40,0.53}{##1}}}
\@namedef{PY@tok@ss}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@sx}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@m}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@gh}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\@namedef{PY@tok@gu}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.50,0.00,0.50}{##1}}}
\@namedef{PY@tok@gd}{\def\PY@tc##1{\textcolor[rgb]{0.63,0.00,0.00}{##1}}}
\@namedef{PY@tok@gi}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.63,0.00}{##1}}}
\@namedef{PY@tok@gr}{\def\PY@tc##1{\textcolor[rgb]{1.00,0.00,0.00}{##1}}}
\@namedef{PY@tok@ge}{\let\PY@it=\textit}
\@namedef{PY@tok@gs}{\let\PY@bf=\textbf}
\@namedef{PY@tok@gp}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,0.50}{##1}}}
\@namedef{PY@tok@go}{\def\PY@tc##1{\textcolor[rgb]{0.53,0.53,0.53}{##1}}}
\@namedef{PY@tok@gt}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.27,0.87}{##1}}}
\@namedef{PY@tok@err}{\def\PY@bc##1{{\setlength{\fboxsep}{-\fboxrule}\fcolorbox[rgb]{1.00,0.00,0.00}{1,1,1}{\strut ##1}}}}
\@namedef{PY@tok@kc}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kd}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kn}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@kr}{\let\PY@bf=\textbf\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@bp}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.50,0.00}{##1}}}
\@namedef{PY@tok@fm}{\def\PY@tc##1{\textcolor[rgb]{0.00,0.00,1.00}{##1}}}
\@namedef{PY@tok@vc}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@vg}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@vi}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@vm}{\def\PY@tc##1{\textcolor[rgb]{0.10,0.09,0.49}{##1}}}
\@namedef{PY@tok@sa}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sb}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sc}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@dl}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@s2}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@sh}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@s1}{\def\PY@tc##1{\textcolor[rgb]{0.73,0.13,0.13}{##1}}}
\@namedef{PY@tok@mb}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mf}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mh}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mi}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@il}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@mo}{\def\PY@tc##1{\textcolor[rgb]{0.40,0.40,0.40}{##1}}}
\@namedef{PY@tok@ch}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\@namedef{PY@tok@cm}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\@namedef{PY@tok@cpf}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\@namedef{PY@tok@c1}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}
\@namedef{PY@tok@cs}{\let\PY@it=\textit\def\PY@tc##1{\textcolor[rgb]{0.25,0.50,0.50}{##1}}}

\def\PYZbs{\char`\\}
\def\PYZus{\char`\_}
\def\PYZob{\char`\{}
\def\PYZcb{\char`\}}
\def\PYZca{\char`\^}
\def\PYZam{\char`\&}
\def\PYZlt{\char`\<}
\def\PYZgt{\char`\>}
\def\PYZsh{\char`\#}
\def\PYZpc{\char`\%}
\def\PYZdl{\char`\$}
\def\PYZhy{\char`\-}
\def\PYZsq{\char`\'}
\def\PYZdq{\char`\"}
\def\PYZti{\char`\~}
% for compatibility with earlier versions
\def\PYZat{@}
\def\PYZlb{[}
\def\PYZrb{]}
\makeatother


    % For linebreaks inside Verbatim environment from package fancyvrb. 
    \makeatletter
        \newbox\Wrappedcontinuationbox 
        \newbox\Wrappedvisiblespacebox 
        \newcommand*\Wrappedvisiblespace {\textcolor{red}{\textvisiblespace}} 
        \newcommand*\Wrappedcontinuationsymbol {\textcolor{red}{\llap{\tiny$\m@th\hookrightarrow$}}} 
        \newcommand*\Wrappedcontinuationindent {3ex } 
        \newcommand*\Wrappedafterbreak {\kern\Wrappedcontinuationindent\copy\Wrappedcontinuationbox} 
        % Take advantage of the already applied Pygments mark-up to insert 
        % potential linebreaks for TeX processing. 
        %        {, <, #, %, $, ' and ": go to next line. 
        %        _, }, ^, &, >, - and ~: stay at end of broken line. 
        % Use of \textquotesingle for straight quote. 
        \newcommand*\Wrappedbreaksatspecials {% 
            \def\PYGZus{\discretionary{\char`\_}{\Wrappedafterbreak}{\char`\_}}% 
            \def\PYGZob{\discretionary{}{\Wrappedafterbreak\char`\{}{\char`\{}}% 
            \def\PYGZcb{\discretionary{\char`\}}{\Wrappedafterbreak}{\char`\}}}% 
            \def\PYGZca{\discretionary{\char`\^}{\Wrappedafterbreak}{\char`\^}}% 
            \def\PYGZam{\discretionary{\char`\&}{\Wrappedafterbreak}{\char`\&}}% 
            \def\PYGZlt{\discretionary{}{\Wrappedafterbreak\char`\<}{\char`\<}}% 
            \def\PYGZgt{\discretionary{\char`\>}{\Wrappedafterbreak}{\char`\>}}% 
            \def\PYGZsh{\discretionary{}{\Wrappedafterbreak\char`\#}{\char`\#}}% 
            \def\PYGZpc{\discretionary{}{\Wrappedafterbreak\char`\%}{\char`\%}}% 
            \def\PYGZdl{\discretionary{}{\Wrappedafterbreak\char`\$}{\char`\$}}% 
            \def\PYGZhy{\discretionary{\char`\-}{\Wrappedafterbreak}{\char`\-}}% 
            \def\PYGZsq{\discretionary{}{\Wrappedafterbreak\textquotesingle}{\textquotesingle}}% 
            \def\PYGZdq{\discretionary{}{\Wrappedafterbreak\char`\"}{\char`\"}}% 
            \def\PYGZti{\discretionary{\char`\~}{\Wrappedafterbreak}{\char`\~}}% 
        } 
        % Some characters . , ; ? ! / are not pygmentized. 
        % This macro makes them "active" and they will insert potential linebreaks 
        \newcommand*\Wrappedbreaksatpunct {% 
            \lccode`\~`\.\lowercase{\def~}{\discretionary{\hbox{\char`\.}}{\Wrappedafterbreak}{\hbox{\char`\.}}}% 
            \lccode`\~`\,\lowercase{\def~}{\discretionary{\hbox{\char`\,}}{\Wrappedafterbreak}{\hbox{\char`\,}}}% 
            \lccode`\~`\;\lowercase{\def~}{\discretionary{\hbox{\char`\;}}{\Wrappedafterbreak}{\hbox{\char`\;}}}% 
            \lccode`\~`\:\lowercase{\def~}{\discretionary{\hbox{\char`\:}}{\Wrappedafterbreak}{\hbox{\char`\:}}}% 
            \lccode`\~`\?\lowercase{\def~}{\discretionary{\hbox{\char`\?}}{\Wrappedafterbreak}{\hbox{\char`\?}}}% 
            \lccode`\~`\!\lowercase{\def~}{\discretionary{\hbox{\char`\!}}{\Wrappedafterbreak}{\hbox{\char`\!}}}% 
            \lccode`\~`\/\lowercase{\def~}{\discretionary{\hbox{\char`\/}}{\Wrappedafterbreak}{\hbox{\char`\/}}}% 
            \catcode`\.\active
            \catcode`\,\active 
            \catcode`\;\active
            \catcode`\:\active
            \catcode`\?\active
            \catcode`\!\active
            \catcode`\/\active 
            \lccode`\~`\~ 	
        }
    \makeatother

    \let\OriginalVerbatim=\Verbatim
    \makeatletter
    \renewcommand{\Verbatim}[1][1]{%
        %\parskip\z@skip
        \sbox\Wrappedcontinuationbox {\Wrappedcontinuationsymbol}%
        \sbox\Wrappedvisiblespacebox {\FV@SetupFont\Wrappedvisiblespace}%
        \def\FancyVerbFormatLine ##1{\hsize\linewidth
            \vtop{\raggedright\hyphenpenalty\z@\exhyphenpenalty\z@
                \doublehyphendemerits\z@\finalhyphendemerits\z@
                \strut ##1\strut}%
        }%
        % If the linebreak is at a space, the latter will be displayed as visible
        % space at end of first line, and a continuation symbol starts next line.
        % Stretch/shrink are however usually zero for typewriter font.
        \def\FV@Space {%
            \nobreak\hskip\z@ plus\fontdimen3\font minus\fontdimen4\font
            \discretionary{\copy\Wrappedvisiblespacebox}{\Wrappedafterbreak}
            {\kern\fontdimen2\font}%
        }%
        
        % Allow breaks at special characters using \PYG... macros.
        \Wrappedbreaksatspecials
        % Breaks at punctuation characters . , ; ? ! and / need catcode=\active 	
        \OriginalVerbatim[#1,codes*=\Wrappedbreaksatpunct]%
    }
    \makeatother

    % Exact colors from NB
    \definecolor{incolor}{HTML}{303F9F}
    \definecolor{outcolor}{HTML}{D84315}
    \definecolor{cellborder}{HTML}{CFCFCF}
    \definecolor{cellbackground}{HTML}{F7F7F7}
    
    % prompt
    \makeatletter
    \newcommand{\boxspacing}{\kern\kvtcb@left@rule\kern\kvtcb@boxsep}
    \makeatother
    \newcommand{\prompt}[4]{
        {\ttfamily\llap{{\color{#2}[#3]:\hspace{3pt}#4}}\vspace{-\baselineskip}}
    }
    

    
    % Prevent overflowing lines due to hard-to-break entities
    \sloppy 
    % Setup hyperref package
    \hypersetup{
      breaklinks=true,  % so long urls are correctly broken across lines
      colorlinks=true,
      urlcolor=urlcolor,
      linkcolor=linkcolor,
      citecolor=citecolor,
      }
    % Slightly bigger margins than the latex defaults
    
    \geometry{verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in}
    


\pagestyle{fancy}
\fancyhead[C]{第 1-3 章《数据分析》上机作业}

\begin{document}
    
    % \maketitle

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: Box, Table
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\fancypage{\fbox}{}

\begin{table}[h]
\begin{tabularx}{\textwidth}{l|l|l|l|l|l}
    \bfseries{班级\qquad\qquad} & xxx &
    \bfseries{学号\qquad\qquad} & xxx &
    \bfseries{姓名\qquad\qquad} & {xxx \quad\,} \\
    \hline
    \hline
\end{tabularx}
\end{table}

\begin{flushleft}
结合课程上机(R 语言), 完成以下上机作业问题: 

{\bfseries{注 1}} 基本要求: 

1) 针对题目要求给出解答, 给出核心关键代码，不必要粘贴所有源代码): 

2) 简要概述你通过编程解决此问题所遇到的难点及收获的 R 语言或课程理论等方面的心得。 

{\bfseries{注 2}} 评价依据: 

1) 解答的完整性、正确性: 是否缺少内容、是否计算无误;     

2) 难点分析: 是否记录了解决问题过程中的难点和心得;     

3) 核心关键代码是否有恰当的注释;     

4) 雷同抄袭等: 超期作业评价不超过 60 分, 雷同抄袭一律 0 分。     
\end{flushleft}
    
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: 1.3
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    \hypertarget{ux4e60ux9898-1.3}{%
\section{习题 1.3}\label{ux4e60ux9898-1.3}}

数据：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{read.table}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{./ex\PYZus{}1\PYZus{}3.txt\PYZdq{}}\PY{p}{,} \PY{n}{header}\PY{o}{=}\PY{k+kc}{TRUE}\PY{p}{); }\PY{n}{data}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 22 × 4
\begin{tabular}{llll}
 Year & Nationwide & Rural & Urban\\
 <int> & <int> & <int> & <int>\\
\hline
	 1978 &  184 &  138 &  405\\
	 1979 &  207 &  158 &  434\\
	 1980 &  236 &  178 &  496\\
     \vdots&\vdots&\vdots&\vdots\\
	 1997 & 2834 & 1876 & 5796\\
	 1998 & 2972 & 1895 & 6217\\
	 1999 & 3180 & 1973 & 6651\\
\end{tabular}


    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
      Year        Nationwide         Rural            Urban       
 Min.   :1978   Min.   : 184.0   Min.   : 138.0   Min.   : 405.0  
 1st Qu.:1983   1st Qu.: 321.8   1st Qu.: 255.2   1st Qu.: 617.8  
 Median :1988   Median : 727.5   Median : 530.5   Median :1499.5  
 Mean   :1988   Mean   :1117.0   Mean   : 747.9   Mean   :2336.4  
 3rd Qu.:1994   3rd Qu.:1642.2   3rd Qu.:1052.2   3rd Qu.:3675.0  
 Max.   :1999   Max.   :3180.0   Max.   :1973.0   Max.   :6651.0  
    \end{Verbatim}

    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{attach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    方便直接使用 Year, Nationwide, Rural, Urban 这几个变量。

    \hypertarget{section}{%
\subsection{(1)}\label{section}}

求均值、方差、标准差、变异系数、偏度、峰度

\textbf{均值}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{4}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{c}\PY{p}{(}\PY{n+nf}{mean}\PY{p}{(}\PY{n}{Nationwide}\PY{p}{)}\PY{p}{,} \PY{n+nf}{mean}\PY{p}{(}\PY{n}{Rural}\PY{p}{)}\PY{p}{,} \PY{n+nf}{mean}\PY{p}{(}\PY{n}{Urban}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{enumerate*}
\item 1117
\item 747.863636363636
\item 2336.40909090909
\end{enumerate*}


    
    或者，R 提供有一个更方便的函数 \texttt{colMeans}，可以直接计算 data
frame 各列均值 (\texttt{data{[}-1{]}} 排除了第一列 Year)：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{5}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{colMeans}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] 1117
\item[Rural] 747.863636363636
\item[Urban] 2336.40909090909
\end{description*}


    
    colMeans 函数等价于如下 apply:

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{n}{MARGIN}\PY{o}{=}\PY{l+m}{2}\PY{p}{,} \PY{n}{FUN}\PY{o}{=}\PY{n}{mean}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] 1117
\item[Rural] 747.863636363636
\item[Urban] 2336.40909090909
\end{description*}


    
    注：\texttt{MARGIN\ =\ 2} 就是取列。

    \textbf{方差}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{7}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{var}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] 1031680.28571429
\item[Rural] 399673.837662338
\item[Urban] 4536136.44372294
\end{description*}


    
    \textbf{标准差}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{8}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{sd}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] 1015.71663652531
\item[Rural] 632.197625479832
\item[Urban] 2129.82075389525
\end{description*}


    
    \textbf{变异系数}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{9}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{cv} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{n+nf}{sd}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{o}{/}\PY{n+nf}{mean}\PY{p}{(}\PY{n}{x}\PY{p}{)}
\PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{cv}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] 0.90932554747118
\item[Rural] 0.845338100076357
\item[Urban] 0.91157869663422
\end{description*}


    
    \textbf{偏度}（skewness）：

    可以调用一个库完成计算\cite{ref8}，也可以照着书上写公式\cite{ref9}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{10}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c+c1}{\PYZsh{} install.packages(\PYZdq{}psych\PYZdq{})}
\PY{n+nf}{library}\PY{p}{(}\PY{n}{psych}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{11}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c+c1}{\PYZsh{} g1 \PYZlt{}\PYZhy{} function(x) skew(x, type=2)  \PYZsh{} g1、g2 是用 type=2: seehelp(skew)}
\PY{c+c1}{\PYZsh{} or 按照书上（P6）的手写这个函数}
\PY{n}{g1} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n}{n} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{length}\PY{p}{(}\PY{n}{x}\PY{p}{)}
    \PY{n}{A} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{n} \PY{o}{/} \PY{p}{(}\PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}1}\PY{p}{)} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}2}\PY{p}{)}\PY{p}{)}
    \PY{n}{B} \PY{o}{\PYZlt{}\PYZhy{}} \PY{l+m}{1} \PY{o}{/} \PY{n+nf}{sd}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{3}
    \PY{n}{S} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{sum}\PY{p}{(}\PY{p}{(}\PY{n}{x} \PY{o}{\PYZhy{}} \PY{n+nf}{mean}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{3}\PY{p}{)}
    \PY{n}{A} \PY{o}{*} \PY{n}{B} \PY{o}{*} \PY{n}{S}
\PY{p}{\PYZcb{}}
\PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{g1}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] 1.02484718945818
\item[Rural] 1.01256119786818
\item[Urban] 0.970464107448573
\end{description*}


    
    \textbf{峰度}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{12}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c+c1}{\PYZsh{} g2 \PYZlt{}\PYZhy{} function(x) kurtosi(x, type=2)  \PYZsh{} help(kurtosi)}
\PY{n}{g2} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n}{n} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{length}\PY{p}{(}\PY{n}{x}\PY{p}{)}
    \PY{n}{A} \PY{o}{\PYZlt{}\PYZhy{}} \PY{p}{(}\PY{n}{n} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{+1}\PY{p}{)}\PY{p}{)} \PY{o}{/} \PY{p}{(}\PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}1}\PY{p}{)} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}2}\PY{p}{)} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}3}\PY{p}{)}\PY{p}{)}
    \PY{n}{B} \PY{o}{\PYZlt{}\PYZhy{}} \PY{l+m}{1} \PY{o}{/} \PY{n+nf}{sd}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{4}
    \PY{n}{S} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{sum}\PY{p}{(}\PY{p}{(}\PY{n}{x} \PY{o}{\PYZhy{}} \PY{n+nf}{mean}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{4}\PY{p}{)}
    \PY{n}{C} \PY{o}{\PYZlt{}\PYZhy{}} \PY{p}{(}\PY{l+m}{3} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}1}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{2}\PY{p}{)} \PY{o}{/} \PY{p}{(}\PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}2}\PY{p}{)} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}3}\PY{p}{)}\PY{p}{)}
    \PY{n}{A} \PY{o}{*} \PY{n}{B} \PY{o}{*} \PY{n}{S} \PY{o}{\PYZhy{}} \PY{n}{C}
\PY{p}{\PYZcb{}}
\PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{g2}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] -0.457241207817378
\item[Rural] -0.451444093083178
\item[Urban] -0.573162098915409
\end{description*}


    
    注：实际上，psych 包提供了一个 \texttt{describe}
函数，可以一次性得到各种常用值：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{13}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{describe}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{n}{type}\PY{o}{=}\PY{l+m}{2}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\resizebox{\textwidth}{15mm}{
\begin{tabular}{r|lllllllllllll}
  & vars & n & mean & sd & median & trimmed & mad & min & max & range & skew & kurtosis & se\\
  & <int> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl>\\
\hline
	Nationwide & 1 & 22 & 1117.0000 & 1015.7166 &  727.5 & 1001.7222 &  673.8417 & 184 & 3180 & 2996 & 1.0248472 & -0.4572412 & 216.5515\\
	Rural & 2 & 22 &  747.8636 &  632.1976 &  530.5 &  682.7222 &  469.9842 & 138 & 1973 & 1835 & 1.0125612 & -0.4514441 & 134.7850\\
	Urban & 3 & 22 & 2336.4091 & 2129.8208 & 1499.5 & 2094.1111 & 1379.5593 & 405 & 6651 & 6246 & 0.9704641 & -0.5731621 & 454.0793\\
\end{tabular}}


    
    \hypertarget{section}{%
\subsection{(2)}\label{section}}

\textbf{中位数}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{14}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{median}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] 727.5
\item[Rural] 530.5
\item[Urban] 1499.5
\end{description*}


    
    \textbf{四分位距}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{15}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{quantile}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    A matrix: 5 × 3 of type dbl
\begin{tabular}{r|lll}
  & Nationwide & Rural & Urban\\
\hline
	0\% &  184.00 &  138.00 &  405.00\\
	25\% &  321.75 &  255.25 &  617.75\\
	50\% &  727.50 &  530.50 & 1499.50\\
	75\% & 1642.25 & 1052.25 & 3675.00\\
	100\% & 3180.00 & 1973.00 & 6651.00\\
\end{tabular}


    
    五数：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{16}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{fn} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{fivenum}\PY{p}{)}
\PY{n}{fn}
\end{Verbatim}
\end{tcolorbox}

    A matrix: 5 × 3 of type dbl
\begin{tabular}{lll}
 Nationwide & Rural & Urban\\
\hline
	  184.0 &  138.0 &  405.0\\
	  311.0 &  246.0 &  603.0\\
	  727.5 &  530.5 & 1499.5\\
	 1746.0 & 1118.0 & 3891.0\\
	 3180.0 & 1973.0 & 6651.0\\
\end{tabular}


    
    四分位极差：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{17}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{R1} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{Q3}\PY{p}{,} \PY{n}{Q1}\PY{p}{)} \PY{n}{Q3} \PY{o}{\PYZhy{}} \PY{n}{Q1}
\PY{n+nf}{R1}\PY{p}{(}\PY{n}{Q3}\PY{o}{=}\PY{n}{fn}\PY{p}{[}\PY{l+m}{4}\PY{p}{,}\PY{p}{]}\PY{p}{,} \PY{n}{Q1}\PY{o}{=}\PY{n}{fn}\PY{p}{[}\PY{l+m}{2}\PY{p}{,}\PY{p}{]}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] 1435
\item[Rural] 872
\item[Urban] 3288
\end{description*}


    
    三均值：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{18}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{M3} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{Q1}\PY{p}{,} \PY{n}{M}\PY{p}{,} \PY{n}{Q3}\PY{p}{)} \PY{n}{Q1}\PY{o}{/}\PY{l+m}{4} \PY{o}{+} \PY{n}{M}\PY{o}{/}\PY{l+m}{2} \PY{o}{+} \PY{n}{Q3}\PY{o}{/}\PY{l+m}{4}
\PY{n+nf}{M3}\PY{p}{(}\PY{n}{Q1}\PY{o}{=}\PY{n}{fn}\PY{p}{[}\PY{l+m}{2}\PY{p}{,}\PY{p}{]}\PY{p}{,} \PY{n}{M}\PY{o}{=}\PY{n}{fn}\PY{p}{[}\PY{l+m}{3}\PY{p}{,}\PY{p}{]}\PY{p}{,} \PY{n}{Q3}\PY{o}{=}\PY{n}{fn}\PY{p}{[}\PY{l+m}{4}\PY{p}{,}\PY{p}{]}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[Nationwide] 878
\item[Rural] 606.25
\item[Urban] 1873.25
\end{description*}


    
    \hypertarget{ux76f4ux65b9ux56fe}{%
\subsection{(3) 直方图}\label{ux76f4ux65b9ux56fe}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{19}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{histogram} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{,} \PY{n}{xname}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{x\PYZdq{}}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n+nf}{hist}\PY{p}{(}\PY{n}{x}\PY{p}{,} \PY{n}{prob}\PY{o}{=}\PY{k+kc}{TRUE}\PY{p}{,} \PY{n}{main}\PY{o}{=}\PY{n+nf}{paste}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{Histogram of\PYZdq{}} \PY{p}{,} \PY{n}{xname}\PY{p}{)}\PY{p}{)}
    \PY{n+nf}{lines}\PY{p}{(}\PY{n+nf}{density}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{)}
    \PY{n+nf}{rug}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{c+c1}{\PYZsh{} show the actual data points}
\PY{p}{\PYZcb{}}
\end{Verbatim}
\end{tcolorbox}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{20}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c+c1}{\PYZsh{} layout(matrix(c(1,2,3), nr=1, byrow=T))}
\PY{n+nf}{histogram}\PY{p}{(}\PY{n}{Nationwide}\PY{p}{,} \PY{n}{xname}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{Nationwide\PYZdq{}}\PY{p}{)}
\PY{n+nf}{histogram}\PY{p}{(}\PY{n}{Rural}\PY{p}{,} \PY{n}{xname}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{Rural\PYZdq{}}\PY{p}{)}
\PY{n+nf}{histogram}\PY{p}{(}\PY{n}{Urban}\PY{p}{,} \PY{n}{xname}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{Urban\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{center}
\begin{tabular}{rcl}
    \adjustimage{max size={0.3\linewidth}{0.3\paperheight}}{ex_1_3/output_38_0.png}
    &
    \adjustimage{max size={0.3\linewidth}{0.3\paperheight}}{ex_1_3/output_38_1.png}
    &
    \adjustimage{max size={0.3\linewidth}{0.3\paperheight}}{ex_1_3/output_38_2.png}
\end{tabular}
\end{center}


    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_1_3/output_38_0.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    
    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_1_3/output_38_1.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    
    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_1_3/output_38_2.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    
    \hypertarget{ux830eux53f6ux56fe}{%
\subsection{(4) 茎叶图}\label{ux830eux53f6ux56fe}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{21}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{stem}\PY{p}{(}\PY{n}{Nationwide}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]

  The decimal point is 3 digit(s) to the right of the |

  0 | 22233344567889
  1 | 137
  2 | 368
  3 | 02

    \end{Verbatim}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{22}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{stem}\PY{p}{(}\PY{n}{Rural}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]

  The decimal point is 3 digit(s) to the right of the |

  0 | 1222223344
  0 | 566679
  1 | 14
  1 | 899
  2 | 0

    \end{Verbatim}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{23}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{stem}\PY{p}{(}\PY{n}{Urban}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]

  The decimal point is 3 digit(s) to the right of the |

  0 | 44566678914679
  2 | 409
  4 | 948
  6 | 27

    \end{Verbatim}

    \hypertarget{ux5f02ux5e38ux503c}{%
\subsection{(5) 异常值}\label{ux5f02ux5e38ux503c}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{24}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{abnormal} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n}{fn} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{fivenum}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{;}
    \PY{n}{Q1} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{fn}\PY{p}{[}\PY{l+m}{2}\PY{p}{]}\PY{p}{;}  \PY{n}{Q3} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{fn}\PY{p}{[}\PY{l+m}{4}\PY{p}{]}\PY{p}{;}
    \PY{n}{R1} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{Q3} \PY{o}{\PYZhy{}} \PY{n}{Q1}
    \PY{n}{QD} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{Q1} \PY{o}{\PYZhy{}} \PY{l+m}{1.5} \PY{o}{*} \PY{n}{R1}
    \PY{n}{QU} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{Q3} \PY{o}{+} \PY{l+m}{1.5} \PY{o}{*} \PY{n}{R1}
    \PY{n}{x}\PY{p}{[}\PY{p}{(}\PY{n}{x} \PY{o}{\PYZlt{}} \PY{n}{QD}\PY{p}{)} \PY{o}{|} \PY{p}{(}\PY{n}{x} \PY{o}{\PYZgt{}} \PY{n}{QU}\PY{p}{)}\PY{p}{]}
\PY{p}{\PYZcb{}}
\end{Verbatim}
\end{tcolorbox}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{25}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{abnormal}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    

    
    结果为空：没有异常值。

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{26}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{detach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: 1.4
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%



    
    \hypertarget{ux4e60ux9898-1.4}{%
\section{习题 1.4}\label{ux4e60ux9898-1.4}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{read.csv}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{./ex\PYZus{}1\PYZus{}4.csv\PYZdq{}}\PY{p}{)}
\PY{n+nf}{cat}\PY{p}{(}\PY{n+nf}{names}\PY{p}{(}\PY{n}{data}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
X.序号 省市区 X11月 X1.11月
    \end{Verbatim}

    设「11月」为 \(X_1\), 「1～11月」为 \(X_2\)：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}  \PY{c+c1}{\PYZsh{} remove \PYZdq{}序号\PYZdq{} col}
\PY{n+nf}{names}\PY{p}{(}\PY{n}{data}\PY{p}{)} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{c}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{Province\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{X1\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{X2\PYZdq{}}\PY{p}{)}
\PY{n}{data}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 31 × 3
\begin{tabular}{lll}
 Province & X1 & X2\\
 <chr> & <dbl> & <dbl>\\
\hline
	 北京       & 35.22 &  499.80\\
	 天津       & 10.41 &  161.37\\
	 河北       & 17.22 &  273.29\\
     \vdots    &  \vdots & \vdots \\
	 青海       &  1.21 &   18.30\\
	 宁夏       &  2.31 &   23.81\\
	 新疆       &  3.24 &  103.81\\
\end{tabular}


    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{attach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \hypertarget{ux5747ux503cux65b9ux5deeux6807ux51c6ux5deeux53d8ux5f02ux7cfbux6570ux504fux5ea6ux5cf0ux5ea6}{%
\subsection{(1)
均值、方差、标准差、变异系数、偏度、峰度}\label{ux5747ux503cux65b9ux5deeux6807ux51c6ux5deeux53d8ux5f02ux7cfbux6570ux504fux5ea6ux5cf0ux5ea6}}

    调用库完成：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{4}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{library}\PY{p}{(}\PY{n}{psych}\PY{p}{)}
\PY{n+nf}{describe}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{n}{type}\PY{o}{=}\PY{l+m}{2}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\resizebox{\textwidth}{15mm}{
\begin{tabular}{r|lllllllllllll}
  & vars & n & mean & sd & median & trimmed & mad & min & max & range & skew & kurtosis & se\\
  & <int> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl>\\
\hline
	X1 & 1 & 31 &  19.16645 &  19.79977 &  14.77 &  15.7252 &   8.258082 & 0.77 &   99.32 &   98.55 & 2.515352 & 8.266989 &  3.556143\\
	X2 & 2 & 31 & 246.19323 & 232.97210 & 179.41 & 210.6356 & 123.856404 & 6.08 & 1080.26 & 1074.18 & 1.915957 & 4.385233 & 41.843024\\
\end{tabular}}


    
    手动实现：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{5}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{describes} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{df}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n}{cv} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{n+nf}{sd}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{o}{/}\PY{n+nf}{mean}\PY{p}{(}\PY{n}{x}\PY{p}{)}  \PY{c+c1}{\PYZsh{} 变异系数}
    \PY{n}{g1} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{p}{\PYZob{}}  \PY{c+c1}{\PYZsh{} 偏度}
        \PY{n}{n} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{length}\PY{p}{(}\PY{n}{x}\PY{p}{)}
        \PY{n}{A} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{n} \PY{o}{/} \PY{p}{(}\PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}1}\PY{p}{)} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}2}\PY{p}{)}\PY{p}{)}
        \PY{n}{B} \PY{o}{\PYZlt{}\PYZhy{}} \PY{l+m}{1} \PY{o}{/} \PY{n+nf}{sd}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{3}
        \PY{n}{S} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{sum}\PY{p}{(}\PY{p}{(}\PY{n}{x} \PY{o}{\PYZhy{}} \PY{n+nf}{mean}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{3}\PY{p}{)}
        \PY{n}{A} \PY{o}{*} \PY{n}{B} \PY{o}{*} \PY{n}{S}
    \PY{p}{\PYZcb{}}
    \PY{n}{g2} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{p}{\PYZob{}}  \PY{c+c1}{\PYZsh{} 峰度}
        \PY{n}{n} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{length}\PY{p}{(}\PY{n}{x}\PY{p}{)}
        \PY{n}{A} \PY{o}{\PYZlt{}\PYZhy{}} \PY{p}{(}\PY{n}{n} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{+1}\PY{p}{)}\PY{p}{)} \PY{o}{/} \PY{p}{(}\PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}1}\PY{p}{)} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}2}\PY{p}{)} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}3}\PY{p}{)}\PY{p}{)}
        \PY{n}{B} \PY{o}{\PYZlt{}\PYZhy{}} \PY{l+m}{1} \PY{o}{/} \PY{n+nf}{sd}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{4}
        \PY{n}{S} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{sum}\PY{p}{(}\PY{p}{(}\PY{n}{x} \PY{o}{\PYZhy{}} \PY{n+nf}{mean}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{4}\PY{p}{)}
        \PY{n}{C} \PY{o}{\PYZlt{}\PYZhy{}} \PY{p}{(}\PY{l+m}{3} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}1}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{2}\PY{p}{)} \PY{o}{/} \PY{p}{(}\PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}2}\PY{p}{)} \PY{o}{*} \PY{p}{(}\PY{n}{n}\PY{l+m}{\PYZhy{}3}\PY{p}{)}\PY{p}{)}
        \PY{n}{A} \PY{o}{*} \PY{n}{B} \PY{o}{*} \PY{n}{S} \PY{o}{\PYZhy{}} \PY{n}{C}
    \PY{p}{\PYZcb{}}
    \PY{n}{itm} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{matrix}\PY{p}{(}\PY{n+nf}{c}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{均值\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{方差\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{标准差\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{变异系数\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{偏度\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{峰度\PYZdq{}}\PY{p}{)}\PY{p}{,} \PY{l+m}{6}\PY{p}{,} \PY{l+m}{1}\PY{p}{)}
    \PY{n}{res} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{apply}\PY{p}{(}\PY{n}{df}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} 
                 \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{n+nf}{c}\PY{p}{(}\PY{n+nf}{mean}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{,} \PY{n+nf}{var}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{,} \PY{n+nf}{sd}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{,} \PY{n+nf}{cv}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{,} \PY{n+nf}{g1}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{,} \PY{n+nf}{g2}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{)}\PY{p}{)}
    \PY{n+nf}{cbind}\PY{p}{(}\PY{n}{itm}\PY{p}{,} \PY{n}{res}\PY{p}{)}
\PY{p}{\PYZcb{}}
\end{Verbatim}
\end{tcolorbox}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{describes}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    A matrix: 6 × 3 of type chr
\begin{tabular}{lll}
  & X1 & X2\\
\hline
	 均值             & 19.1664516129032 & 246.193225806452 \\
	 方差             & 392.030750322581 & 54275.9982492473 \\
	 标准差       & 19.7997664209096 & 232.972097576614 \\
	 变异系数 & 1.0330428824697  & 0.946297757842323\\
	 偏度             & 2.51535182567297 & 1.91595698889706 \\
	 峰度             & 8.26698939080861 & 4.38523271345801 \\
\end{tabular}


    
    \hypertarget{ux4e2dux4f4dux6570ux4e0aux4e0bux56dbux5206ux4f4dux6570ux56dbux5206ux4f4dux6781ux5dee}{%
\subsection{(2)
中位数、上下四分位数、四分位极差}\label{ux4e2dux4f4dux6570ux4e0aux4e0bux56dbux5206ux4f4dux6570ux56dbux5206ux4f4dux6781ux5dee}}

    五数：minimum, lower-hinge, median, upper-hinge, maximum

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{7}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{fn} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{apply}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{]}\PY{p}{,} \PY{l+m}{2}\PY{p}{,} \PY{n}{fivenum}\PY{p}{)}
\PY{n}{fn}
\end{Verbatim}
\end{tcolorbox}

    A matrix: 5 × 2 of type dbl
\begin{tabular}{ll}
 X1 & X2\\
\hline
	  0.770 &    6.080\\
	  8.265 &  105.350\\
	 14.770 &  179.410\\
	 20.080 &  270.745\\
	 99.320 & 1080.260\\
\end{tabular}


    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{8}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c+c1}{\PYZsh{} 四分位极差}
\PY{n}{R1} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{Q3}\PY{p}{,} \PY{n}{Q1}\PY{p}{)} \PY{n}{Q3} \PY{o}{\PYZhy{}} \PY{n}{Q1}
\PY{n+nf}{R1}\PY{p}{(}\PY{n}{Q3}\PY{o}{=}\PY{n}{fn}\PY{p}{[}\PY{l+m}{4}\PY{p}{,}\PY{p}{]}\PY{p}{,} \PY{n}{Q1}\PY{o}{=}\PY{n}{fn}\PY{p}{[}\PY{l+m}{2}\PY{p}{,}\PY{p}{]}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{description*}
\item[X1] 11.815
\\
\item[X2] 165.395
\end{description*}


    
    \hypertarget{ux76f4ux65b9ux56fe}{%
\subsection{(3) 直方图}\label{ux76f4ux65b9ux56fe}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{9}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{histogram} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{,} \PY{n}{xname}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{x\PYZdq{}}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n+nf}{hist}\PY{p}{(}\PY{n}{x}\PY{p}{,} \PY{n}{prob}\PY{o}{=}\PY{k+kc}{TRUE}\PY{p}{,} \PY{n}{main}\PY{o}{=}\PY{n+nf}{paste}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{Histogram of\PYZdq{}} \PY{p}{,} \PY{n}{xname}\PY{p}{)}\PY{p}{)}
    \PY{n+nf}{lines}\PY{p}{(}\PY{n+nf}{density}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{)}
    \PY{n+nf}{rug}\PY{p}{(}\PY{n}{x}\PY{p}{)} \PY{c+c1}{\PYZsh{} show the actual data points}
\PY{p}{\PYZcb{}}
\end{Verbatim}
\end{tcolorbox}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{10}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{histogram}\PY{p}{(}\PY{n}{X1}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{X1\PYZdq{}}\PY{p}{)}
\PY{n+nf}{histogram}\PY{p}{(}\PY{n}{X2}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{X2\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{center}
\begin{tabular}{rl}
    \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_1_4/output_17_0.png}
    &
    \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_1_4/output_17_1.png}
\end{tabular}
\end{center}

    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_1_4/output_17_0.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    
    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_1_4/output_17_1.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    
    \hypertarget{ux7ecfux9a8cux5206ux5e03ux51fdux6570ux56fe}{%
\subsection{(4)
经验分布函数图}\label{ux7ecfux9a8cux5206ux5e03ux51fdux6570ux56fe}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{11}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{plot\PYZus{}ecdf} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{,} \PY{n}{xname}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{x\PYZdq{}}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n+nf}{plot}\PY{p}{(}\PY{n+nf}{ecdf}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{,} \PY{n}{do.points}\PY{o}{=}\PY{k+kc}{FALSE}\PY{p}{,} \PY{n}{verticals}\PY{o}{=}\PY{k+kc}{TRUE}\PY{p}{,} \PY{n}{main}\PY{o}{=}\PY{n+nf}{paste}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{ecdf(\PYZdq{}} \PY{p}{,} \PY{n}{xname}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{)\PYZdq{}}\PY{p}{)}\PY{p}{)}
    \PY{n}{xs} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{seq}\PY{p}{(}\PY{n+nf}{min}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{,} \PY{n+nf}{max}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{,} \PY{l+m}{1}\PY{o}{/}\PY{n+nf}{sqrt}\PY{p}{(}\PY{n+nf}{length}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{)}\PY{p}{)}
    \PY{n+nf}{lines}\PY{p}{(}\PY{n}{xs}\PY{p}{,} \PY{n+nf}{pnorm}\PY{p}{(}\PY{n}{xs}\PY{p}{,} \PY{n}{mean}\PY{o}{=}\PY{n+nf}{mean}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{,} \PY{n}{sd}\PY{o}{=}\PY{n+nf}{sd}\PY{p}{(}\PY{n}{x}\PY{p}{)}\PY{p}{)}\PY{p}{,} \PY{n}{lty}\PY{o}{=}\PY{l+m}{3}\PY{p}{,} \PY{n}{col}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{red\PYZdq{}}\PY{p}{)}
\PY{p}{\PYZcb{}}
\end{Verbatim}
\end{tcolorbox}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{12}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{plot\PYZus{}ecdf}\PY{p}{(}\PY{n}{X1}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{X1\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{13}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{plot\PYZus{}ecdf}\PY{p}{(}\PY{n}{X2}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{X2\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{center}
\begin{tabular}{rl}
    \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_1_4/output_20_0.png}
    &
    \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_1_4/output_21_0.png}
\end{tabular}
\end{center}
    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_1_4/output_20_0.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    
    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_1_4/output_21_0.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    
    正态QQ图：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{14}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{qqnorm}\PY{p}{(}\PY{n}{X1}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}
    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{15}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{qqnorm}\PY{p}{(}\PY{n}{X2}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

\begin{center}
\begin{tabular}{rl}
    \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_1_4/output_23_0.png}
    &
    \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_1_4/output_24_0.png}
\end{tabular}
\end{center}

    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_1_4/output_23_0.png}
    % \end{center}
    % % { \hspace*{\fill} \\}

    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_1_4/output_24_0.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    
    \hypertarget{x_1x_2-ux7684-pearson-ux76f8ux5173ux7cfbux6570ux4e0e-spearman-ux76f8ux5173ux7cfbux6570}{%
\subsection{\texorpdfstring{(5) \(X_1\)、\(X_2\) 的 Pearson
相关系数与 Spearman
相关系数}{(5) X\_1、X\_2 的 Pearson 相关系数与 Spearman 相关系数}}\label{x_1x_2-ux7684-pearson-ux76f8ux5173ux7cfbux6570ux4e0e-spearman-ux76f8ux5173ux7cfbux6570}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{16}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{cor.test}\PY{p}{(}\PY{n}{X1}\PY{p}{,} \PY{n}{X2}\PY{p}{,} \PY{n}{method}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{pearson\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

	Pearson's product-moment correlation

data:  X1 and X2
t = 24.265, df = 29, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.9508176 0.9886055
sample estimates:
      cor 
0.9762474 

    \end{Verbatim}

    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{17}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{cor.test}\PY{p}{(}\PY{n}{X1}\PY{p}{,} \PY{n}{X2}\PY{p}{,} \PY{n}{method}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{spearman\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

	Spearman's rank correlation rho

data:  X1 and X2
S = 358, p-value = 9.781e-09
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.9278226 

    \end{Verbatim}

    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{18}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{detach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: 2.4
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    



    
    \hypertarget{ux4e60ux9898-2.4}{%
\section{习题 2.4}\label{ux4e60ux9898-2.4}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{read.table}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{./ex\PYZus{}2\PYZus{}4.txt\PYZdq{}}\PY{p}{,} \PY{n}{header}\PY{o}{=}\PY{k+kc}{TRUE}\PY{p}{); }\PY{n}{data}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 15 × 3
\begin{tabular}{lll}
 Y & X1 & X2\\
 <int> & <int> & <int>\\
\hline
	 162 & 274 & 2450\\
	 120 & 180 & 3254\\
	 223 & 375 & 3802\\
     \vdots & \vdots & \vdots \\
	 144 & 236 & 2660\\
	 103 & 157 & 2088\\
	 212 & 370 & 2605\\
\end{tabular}


    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{attach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    回归之前应该先看一下变量之间的相关关系如何，可以借助\emph{散点图矩阵}来实现。

散点图矩阵（scatterplot matrix):
每个行与列的交叉点所在的散点图表示其所在的行与列的两个变量的相关关系：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{pairs}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_2_4/output_4_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    用 psych 包里的 \texttt{pairs.panels} 可以作出有更多信息的图：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{4}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{library}\PY{p}{(}\PY{n}{psych}\PY{p}{)}
\PY{n+nf}{pairs.panels}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_2_4/output_6_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    \begin{itemize}
\tightlist
\item
  对角线上方：相关系数
\item
  对角线：每个特征的数值分布直方图
\item
  下方：散点图

  \begin{itemize}
  \tightlist
  \item
    相关椭圆：

    \begin{itemize}
    \tightlist
    \item
      中心点：两个变量的均值所确定的点
    \item
      椭圆形状：两个变量的相关性：椭圆越被拉伸，其相关性越强
    \end{itemize}
  \item
    局部回归平滑曲线：x轴和y轴变量之间的一般关系
  \end{itemize}
\end{itemize}

    假设 \(Y\) 与 \(X_1\), \(X_2\) 之间满足线性回归关系

\[
y_i=\beta_0+\beta_1x_{i1}+\beta_2x_{i2}+\epsilon_i,\quad i=1,2,\cdots,15
\]

其中 \(\epsilon_i (i=1,1,\cdots,15)\) 独立同分布于 \(N(0,\sigma^2)\).

    \hypertarget{section}{%
\subsection{(1)}\label{section}}

求回归系数 \(\beta_0,\beta_1,\beta_2\) 的最小二乘估计和误差方差
\(\sigma^2\) 的估计， 写出回归方程并对回归系数作解释

    回归：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{5}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{fm} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{lm}\PY{p}{(}\PY{n}{Y} \PY{o}{\PYZti{}} \PY{n}{X1} \PY{o}{+} \PY{n}{X2}\PY{p}{,} \PY{n}{data}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{fm}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

Call:
lm(formula = Y \textasciitilde{} X1 + X2, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.8320 -1.2044 -0.2406  1.4888  3.3092 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 3.4526128  2.4306505   1.420    0.181    
X1          0.4960050  0.0060544  81.924  < 2e-16 ***
X2          0.0091991  0.0009681   9.502  6.2e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.177 on 12 degrees of freedom
Multiple R-squared:  0.9989,	Adjusted R-squared:  0.9988 
F-statistic:  5679 on 2 and 12 DF,  p-value: < 2.2e-16

    \end{Verbatim}

    
    Coefficients 是回归得到的系数，由此得到回归方程：

\[
\hat Y = 0.496005 X_1 + 0.009199 X_2 + 3.452613
\]

Residual standard error 即残差标准差，\(\sigma=2.177\),
由此得到误差方差的估计：

\[
\sigma^2=4.739
\]

    \hypertarget{section}{%
\subsection{(2)}\label{section}}

求出方差分析表，解释对现行回归关系显著性检验的结果。求复相关系数的平方
\(R^2\) 的值并解释其意义

模型方差分析表:

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{anova}\PY{p}{(}\PY{n}{fm}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    A anova: 3 × 5
\begin{tabular}{r|lllll}
  & Df & Sum Sq & Mean Sq & F value & Pr(>F)\\
  & <int> & <dbl> & <dbl> & <dbl> & <dbl>\\
\hline
	X1 &  1 & 53416.71863 & 53416.718634 & 11268.64354 & 3.270351e-19\\
	X2 &  1 &   427.99780 &   427.997800 &    90.28923 & 6.201181e-07\\
	Residuals & 12 &    56.88357 &     4.740297 &          NA &           NA\\
\end{tabular}


    
    aov 输出更友好，可以直接显示出结果：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{7}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{summary}\PY{p}{(}\PY{n+nf}{aov}\PY{p}{(}\PY{n}{fm}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
            Df Sum Sq Mean Sq  F value  Pr(>F)    
X1           1  53417   53417 11268.64 < 2e-16 ***
X2           1    428     428    90.29 6.2e-07 ***
Residuals   12     57       5                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    \end{Verbatim}

    
    \(R^2\) 可以由 summary(fm) 输出的 \texttt{Multiple\ R-squared} 看出：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{8}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{fm}\PY{p}{)}\PY{o}{\PYZdl{}}\PY{n}{r.squared}
\end{Verbatim}
\end{tcolorbox}

    0.998944677605876

    
    \(\therefore R^2=0.9989\)

    \hypertarget{section}{%
\subsection{(3)}\label{section}}

分别求 \(\beta_1\) 和 \(\beta_2\) 的置信度为 \(95\%\) 的置信区间

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{9}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c+c1}{\PYZsh{} 模型参数的置信区间}
\PY{n+nf}{confint}\PY{p}{(}\PY{n}{fm}\PY{p}{,} \PY{n}{level} \PY{o}{=} \PY{l+m}{0.95}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    A matrix: 3 × 2 of type dbl
\begin{tabular}{r|ll}
  & 2.5 \% & 97.5 \%\\
\hline
	(Intercept) & -1.843319690 & 8.74854527\\
	X1 &  0.482813482 & 0.50919647\\
	X2 &  0.007089742 & 0.01130842\\
\end{tabular}


    
    得到所求置信区间：

\[
\begin{aligned}
\beta_1:&& (0.482813482, 0.50919647)\\
\beta_2:&& (0.007089742, 0.01130842)
\end{aligned}
\]

    \hypertarget{section}{%
\subsection{(4)}\label{section}}

对 \(\alpha=0.05\)，分别检验人数 \(X_1\) 和收入 \(X_2\) 对销量 \(Y\)
的影响是否显著， 利用回归系数有关的一般假设检验方法检验 \(X_1\) 和
\(X_2\) 的交互作用 (\(X_1X_2\)) 对 \(Y\) 的影响是否显著。

    在 \texttt{summary(fm)} 输出的 Coefficients 段即可看出 \(X_1\)、\(X_2\)
对 \(Y\) 影响的显著性：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{10}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{fm}\PY{p}{)}\PY{c+c1}{\PYZsh{}\PYZdl{}coefficients}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

\dots

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 3.4526128  2.4306505   1.420    0.181    
X1          0.4960050  0.0060544  81.924  < 2e-16 ***
X2          0.0091991  0.0009681   9.502  6.2e-07 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

    \end{Verbatim}

    
    这一段输出里有:

\begin{itemize}
\tightlist
\item
  \texttt{t\ value}: T 检验的值
\item
  \texttt{Pr(\textgreater{}\textbar{}t\textbar{})}: 表示 T 检验判定 P
  值，后面有显著性标记（\texttt{*}号）
\item
  显著性标记：\texttt{*} 个数对应显著性水平。
\end{itemize}

这里 \(X_1\)、\(X_2\) 都是有 \texttt{***} 的，或者看 P 都在 0.0001
以下， 所以对于 \(\alpha=0.05\)，认为人数 \(X_1\) 和收入 \(X_2\) 对销量
\(Y\) 的影响显著。

    注：这里可以做约简模型，求\(SSE(R)\)、\(SSE(T)\)\ldots{}
去分析，下面的函数会有帮助：（由于麻烦，这里不采用这种方法。）

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{11}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{sse} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{model}\PY{p}{,} \PY{n}{y}\PY{p}{)} \PY{n+nf}{sum}\PY{p}{(}\PY{p}{(}\PY{n+nf}{fitted}\PY{p}{(}\PY{n}{model}\PY{p}{)} \PY{o}{\PYZhy{}} \PY{n}{y}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{2}\PY{p}{)}
\PY{n}{ssr} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{model}\PY{p}{,} \PY{n}{y}\PY{p}{)} \PY{n+nf}{sum}\PY{p}{(}\PY{p}{(}\PY{n+nf}{fitted}\PY{p}{(}\PY{n}{model}\PY{p}{)} \PY{o}{\PYZhy{}} \PY{n+nf}{mean}\PY{p}{(}\PY{n}{y}\PY{p}{)}\PY{p}{)}\PY{o}{\PYZca{}}\PY{l+m}{2}\PY{p}{)}
\PY{n}{sst} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{model}\PY{p}{,} \PY{n}{y}\PY{p}{)} \PY{n+nf}{ssr}\PY{p}{(}\PY{n}{model}\PY{p}{,} \PY{n}{y}\PY{p}{)} \PY{o}{+} \PY{n+nf}{sse}\PY{p}{(}\PY{n}{model}\PY{p}{,} \PY{n}{y}\PY{p}{)}
\PY{n}{ss} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{model}\PY{p}{,} \PY{n}{y}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n+nf}{cbind}\PY{p}{(}\PY{n+nf}{matrix}\PY{p}{(}\PY{n+nf}{c}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{sse\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{ssr\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{sst\PYZdq{}}\PY{p}{)}\PY{p}{,} \PY{l+m}{3}\PY{p}{,} \PY{l+m}{1}\PY{p}{)}\PY{p}{,} 
          \PY{n+nf}{c}\PY{p}{(}\PY{n+nf}{sse}\PY{p}{(}\PY{n}{model}\PY{p}{,} \PY{n}{y}\PY{p}{)}\PY{p}{,} \PY{n+nf}{ssr}\PY{p}{(}\PY{n}{model}\PY{p}{,} \PY{n}{y}\PY{p}{)}\PY{p}{,} \PY{n+nf}{sst}\PY{p}{(}\PY{n}{model}\PY{p}{,} \PY{n}{y}\PY{p}{)}\PY{p}{)}\PY{p}{)}
    \PY{p}{\PYZcb{}}
\PY{c+c1}{\PYZsh{} R\PYZca{}2}
\PY{c+c1}{\PYZsh{} ssr(fm, Y)/sst(fm, Y)}
\end{Verbatim}
\end{tcolorbox}

    下面研究 \(X_1\)、\(X_2\) 交互作用对 Y 的影响：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{12}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{fm1} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{update}\PY{p}{(}\PY{n}{fm}\PY{p}{,} \PY{n}{.} \PY{o}{\PYZti{}} \PY{n}{.} \PY{o}{+} \PY{n}{X1}\PY{o}{:}\PY{n}{X2}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{fm1}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

Call:
lm(formula = Y \textasciitilde{} X1 + X2 + X1:X2, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.9094 -1.2010 -0.1811  1.5072  3.2141 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 4.901e+00  8.539e+00   0.574    0.578    
X1          4.911e-01  2.832e-02  17.344 2.45e-09 ***
X2          8.674e-03  3.124e-03   2.777    0.018 *  
X1:X2       1.698e-06  9.556e-06   0.178    0.862    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.271 on 11 degrees of freedom
Multiple R-squared:  0.9989,	Adjusted R-squared:  0.9987 
F-statistic:  3481 on 3 and 11 DF,  p-value: < 2.2e-16

    \end{Verbatim}

    
    从拟合的结果里，\texttt{X1:X2} 的 P-value=0.862 比较大，说明交互作用对 Y
的影响不显著，没有必要引入交叉项。

    \hypertarget{section}{%
\subsection{(5)}\label{section}}

\includegraphics{ex_2_4/008i3skNly1gr3yf8l5dzj30mh01taai.jpg}

    新数据：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{13}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{x01} \PY{o}{=} \PY{l+m}{220}
\PY{n}{x02} \PY{o}{=} \PY{l+m}{2500}
\PY{n}{newdata} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{data.frame}\PY{p}{(}\PY{n}{X1} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{n}{x01}\PY{p}{)}\PY{p}{,} \PY{n}{X2} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{n}{x02}\PY{p}{)}\PY{p}{)}
\PY{n}{newdata}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 1 × 2
\begin{tabular}{ll}
 X1 & X2\\
 <dbl> & <dbl>\\
\hline
	 220 & 2500\\
\end{tabular}


    
    代入模型进行预测：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{14}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{predict}\PY{p}{(}\PY{n}{fm}\PY{p}{,} \PY{n}{newdata}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \textbf{1:} 135.571409702671

    
    要获取置信区间需要再传入几个参数：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{15}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{predict}\PY{p}{(}\PY{n}{fm}\PY{p}{,} \PY{n}{newdata}\PY{p}{,} \PY{n}{interval}\PY{o}{=}\PY{l+s}{\PYZdq{}}\PY{l+s}{prediction\PYZdq{}}\PY{p}{,} \PY{n}{levels}\PY{o}{=}\PY{l+m}{0.95}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    A matrix: 1 × 3 of type dbl
\begin{tabular}{r|lll}
  & fit & lwr & upr\\
\hline
	1 & 135.5714 & 130.5998 & 140.543\\
\end{tabular}


    
    即得到预测值 \(\hat y_0 = 135.5714\)，置信度为 \(95\%\) 的置信区间
\((130.5998, 140.543)\)。

    \hypertarget{section}{%
\subsection{(6)}\label{section}}

\includegraphics{ex_2_4/008i3skNly1gr3yz2huq0j30mm02naax.jpg}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{16}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c+c1}{\PYZsh{} 拟合值：}
\PY{n}{fm.fitted} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{fitted}\PY{p}{(}\PY{n}{fm}\PY{p}{)}
\PY{c+c1}{\PYZsh{} 残差：}
\PY{n}{fm.residuals} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{residuals}\PY{p}{(}\PY{n}{fm}\PY{p}{)}
\PY{c+c1}{\PYZsh{} 学生化残差：}
\PY{n}{fm.rstudent} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{rstudent}\PY{p}{(}\PY{n}{fm}\PY{p}{)}
\PY{n}{fmr} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{data.frame}\PY{p}{(}
    \PY{n}{`拟合值`}\PY{o}{=}\PY{n}{fm.fitted}\PY{p}{,} 
    \PY{n}{`残差`}\PY{o}{=}\PY{n}{fm.residuals}\PY{p}{,} 
    \PY{n}{`学生化残差`}\PY{o}{=}\PY{n}{fm.rstudent}
\PY{p}{)}\PY{p}{;} \PY{n}{fmr}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 15 × 3
\begin{tabular}{r|lll}
  & 拟合值 & 残差 & 学生化残差\\
  & <dbl> & <dbl> & <dbl>\\
\hline
	1 & 161.89572 &  0.1042756 &  0.04973473\\
	2 & 122.66732 & -2.6673176 & -1.36670170\\
	3 & 224.42938 & -1.4293843 & -0.71265041\\
	4 & 131.24062 & -0.2406244 & -0.11000544\\
	5 &  67.69928 & -0.6992835 & -0.34443368\\
	6 & 169.68486 & -0.6848553 & -0.33365032\\
	7 &  79.73194 &  1.2680643 &  0.65018013\\
	8 & 189.67200 &  2.3279970 &  1.25776219\\
	9 & 119.83202 & -3.8320189 & -2.21655274\\
	10 &  53.29052 &  1.7094765 &  0.91079624\\
	11 & 253.71506 & -1.7150576 & -0.92397237\\
	12 & 228.69079 &  3.3092051 &  2.16085046\\
	13 & 144.97934 & -0.9793423 & -0.45379850\\
	14 & 100.53307 &  2.4669251 &  1.27498091\\
	15 & 210.93806 &  1.0619404 &  0.55945704\\
\end{tabular}


    
    对于学生化残差用频率检验法：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{17}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{hist}\PY{p}{(}\PY{n}{fm.rstudent}\PY{p}{)}
\PY{n+nf}{rug}\PY{p}{(}\PY{n}{fm.rstudent}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_2_4/output_43_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    \begin{itemize}
\tightlist
\item
  有 \(\frac{10}{15}\approx 0.68\) 落在 \((-1.0,1.0)\) 内，
\item
  有 \(\frac{13}{15}\approx 0.87\) 落在 \((-1.5,1.5)\) 内，
\item
  有 \(\frac{15}{15} = 1.00\) 落在 \((-2.4,2.4)\) 内，
\end{itemize}

可见，学生化残差与上述个区间的频率在 \(N(0,1)\)
分布的相应概率相差不大，所以模型误差项的正态性假设是合理的。

进一步，可以做一个 ks 检测：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{18}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{ks.test}\PY{p}{(}\PY{n}{fm.rstudent}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{pnorm\PYZdq{}}\PY{p}{,} \PY{l+m}{0}\PY{p}{,} \PY{l+m}{1}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

	One-sample Kolmogorov-Smirnov test

data:  fm.rstudent
D = 0.11208, p-value = 0.9808
alternative hypothesis: two-sided

    \end{Verbatim}

    
    检测结果也说明正态分布。

    下面作出各种残差图：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{19}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n+nf}{c}\PY{p}{(}\PY{l+m}{2}\PY{p}{,}\PY{l+m}{2}\PY{p}{)}\PY{p}{)}
\PY{n+nf}{plot}\PY{p}{(}\PY{n}{fm}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.8\linewidth}{0.8\paperheight}}{ex_2_4/output_48_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    \begin{itemize}
\tightlist
\item
  左上图：\textbf{残差-拟合}：残差和拟合值之间，数据点均匀分布在y=0两侧，呈现出随机的分布，没有明显的形状特征，说明残差数据表现比较好。
\item
  右上图：\textbf{标准化残差
  Q-Q}：数据点按对角直线排列，趋于一条直线，并被对角直接穿过，直观上符合正态分布。
\item
  左下图：\textbf{标准化残差-拟合}：数据随机分布与左上图蕾丝，无明显的形状特征。
\item
  右下图：\textbf{标准化残差杠杆图}：可以看出离群点、高杠杆值点和强影响点。（这里不做讨论）
\end{itemize}

由这些图，可以认为相应的线性回归模型以及误差的独立正态分布的假设是合理的。

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{20}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{detach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: 2.5
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    



    
    \hypertarget{ux4e60ux9898-2.5}{%
\section{习题 2.5}\label{ux4e60ux9898-2.5}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{read.csv}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{./ex\PYZus{}2\PYZus{}5.csv\PYZdq{}}\PY{p}{)}\PY{p}{;} \PY{n}{data}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 20 × 2
\begin{tabular}{ll}
 x & y\\
 <dbl> & <dbl>\\
\hline
	 0.05 & 5.9421\\
	 0.15 & 5.4691\\
	 0.25 & 5.8724\\
     \vdots & \vdots \\
	 1.75 & 5.6500\\
	 1.85 & 6.0256\\
	 1.95 & 5.5350\\
\end{tabular}


    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{attach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \hypertarget{section}{%
\subsection{(1)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_2_5/008i3skNly1gr5gs7pbp2j31m209stea.jpg}
\caption{题（1）}
\end{figure}

    首先尝试拟合线性回归模型

\[
\hat Y =\beta_0 + \beta_1 X + \epsilon, \quad \epsilon \sim N
\]

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{linear\PYZus{}model} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{lm}\PY{p}{(}\PY{n}{y} \PY{o}{\PYZti{}} \PY{n}{x}\PY{p}{,} \PY{n}{data}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{linear\PYZus{}model}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

Call:
lm(formula = y \textasciitilde{} x, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.72831 -0.17147 -0.08433  0.16405  0.70025 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 5.319696   0.165202  32.201   <2e-16 ***
x           0.003054   0.143114   0.021    0.983    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3691 on 18 degrees of freedom
Multiple R-squared:  2.53e-05,	Adjusted R-squared:  -0.05553 
F-statistic: 0.0004554 on 1 and 18 DF,  p-value: 0.9832

    \end{Verbatim}

    
    从拟合结果的检验可以看出，\(R^2\) 趋于
\(0\)，说明该模型拟合效果不好；同时 \(x\) 对应的 P 值非常高，X 对 Y
没有显著影响。

    方差分析：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{4}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{summary}\PY{p}{(}\PY{n+nf}{aov}\PY{p}{(}\PY{n}{linear\PYZus{}model}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
            Df Sum Sq Mean Sq F value Pr(>F)
x            1 0.0001 0.00006       0  0.983
Residuals   18 2.4516 0.13620               
    \end{Verbatim}

    
    这也证实了前面的结论。

    下面作出各种残差图：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{5}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n+nf}{c}\PY{p}{(}\PY{l+m}{2}\PY{p}{,}\PY{l+m}{2}\PY{p}{)}\PY{p}{)}
\PY{n+nf}{plot}\PY{p}{(}\PY{n}{linear\PYZus{}model}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.8\linewidth}{0.8\paperheight}}{ex_2_5/output_11_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    可以看出残差还是近似正太分布的。

    下面作出 \(x\)、\(y\) 的散点图：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{library}\PY{p}{(}\PY{n}{psych}\PY{p}{)}
\PY{n+nf}{pairs.panels}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_2_5/output_14_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    感觉上，\(x\)、\(y\) 呈现二次关系，再尝试作出 \(y\) 与 \(x^2\) 及 \(x\)
的散点图：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{7}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{pairs.panels}\PY{p}{(}\PY{n+nf}{data.frame}\PY{p}{(}\PY{n}{`squared\PYZus{}x`}\PY{o}{=}\PY{n}{x}\PY{o}{\PYZca{}}\PY{l+m}{2}\PY{p}{,} \PY{n}{`x`}\PY{o}{=}\PY{n}{x}\PY{p}{,} \PY{n}{`y`}\PY{o}{=}\PY{n}{y}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_2_5/output_16_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    可以看出 \(y\) 与 \(x^2\)
的相关性要优于线性。所以下面尝试做带有二次项的拟合\cite{ref1}：

\[
\hat Y = \beta_0 + \beta_1 X + \beta_2 X^2 + \epsilon, \quad \epsilon \sim N
\]

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{8}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{fm} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{lm}\PY{p}{(}\PY{n}{y} \PY{o}{\PYZti{}} \PY{n}{x} \PY{o}{+} \PY{n+nf}{I}\PY{p}{(}\PY{n}{x}\PY{o}{\PYZca{}}\PY{l+m}{2}\PY{p}{)}\PY{p}{,} \PY{n}{data}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{fm}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

Call:
lm(formula = y \textasciitilde{} x + I(x\^{}2), data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.43449 -0.16752  0.05168  0.14782  0.33394 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   5.9524     0.1529  38.942  < 2e-16 ***
x            -1.8926     0.3535  -5.354 5.26e-05 ***
I(x\^{}2)        0.9478     0.1712   5.537 3.62e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2268 on 17 degrees of freedom
Multiple R-squared:  0.6433,	Adjusted R-squared:  0.6013 
F-statistic: 15.33 on 2 and 17 DF,  p-value: 0.0001565

    \end{Verbatim}

    
    得到回归方程：

\[
\hat Y = 5.9524 -1.8926 X + 0.9478 X^2 
\]

    这一次拟合效果有了显著提升，从检验可以看出，加入的二次项 \(X^2\) 对
\(Y\) 有显著影响。 模型 p 值小于 \(0.05\)，可以认为假设比较合理。

下面进一步做方差分析：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{9}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{summary}\PY{p}{(}\PY{n+nf}{aov}\PY{p}{(}\PY{n}{fm}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
            Df Sum Sq Mean Sq F value   Pr(>F)    
x            1 0.0001  0.0001   0.001    0.973    
I(x\^{}2)       1 1.5771  1.5771  30.658 3.62e-05 ***
Residuals   17 0.8745  0.0514                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    \end{Verbatim}

    
    可以看到 \(X^2\) 对 \(Y\) 的影响显著，进一步证明了模型的合理性。

再作出各种残差图：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{10}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n+nf}{c}\PY{p}{(}\PY{l+m}{2}\PY{p}{,}\PY{l+m}{2}\PY{p}{)}\PY{p}{)}
\PY{n+nf}{plot}\PY{p}{(}\PY{n}{fm}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.8\linewidth}{0.8\paperheight}}{ex_2_5/output_23_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    \begin{itemize}
\tightlist
\item
  左下图：\textbf{标准化残差-拟合}：数据随机分布，无明显的形状特征，说明残差数据表现比较好。
\item
  右上图：\textbf{标准化残差
  Q-Q}：数据点按对角直线排列，直观上符合正态分布。
\end{itemize}

由这些图，可以认为相应的线性回归模型以及误差的独立正态分布的假设是合理的。\cite{ref1}

    进一步，对残差做 Shapiro-Wilk 正态检验：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{11}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{shapiro.test}\PY{p}{(}\PY{n+nf}{residuals}\PY{p}{(}\PY{n}{fm}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
	Shapiro-Wilk normality test
data:  residuals(fm)
W = 0.95731, p-value = 0.4917
    \end{Verbatim}

    
    原假设 \(H_0\) 是数据服从正态分布，这里
\(p>0.05\)，接受原假设，认为数据服从正态分布。

    最后，拟合出的回归方程

\[
\hat Y = 5.9524 -1.8926 X + 0.9478 X^2
\]

与题目给出的真实模型

\[
\begin{aligned}
Y&=5+(X-1)^2\\
&=6-2X+X^2
\end{aligned}
\]

二者相比，形式一致，系数相差不大。拟合的效果比较好，模型选择正确。

    \hypertarget{section}{%
\subsection{(2)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_2_5/008i3skNly1gr5il00jvtj31mm04sq5e.jpg}
\caption{题（2）}
\end{figure}

    在 R 中实现 Box-Cox 变换，需要通过 \texttt{MASS} 包：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{12}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{library}\PY{p}{(}\PY{n}{MASS}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    变换过程需要分两步\cite{ref2}：

\begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\tightlist
\item
  第一步：拟合 Box-Cox 模型，得到 \texttt{lambda} 值
\end{enumerate}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{13}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{bc} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{boxcox}\PY{p}{(}\PY{n}{y} \PY{o}{\PYZti{}} \PY{n}{.}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{data}\PY{p}{)}  \PY{c+c1}{\PYZsh{} . 表示除因变量外的所有变量}
\PY{n+nf}{plot}\PY{p}{(}\PY{n}{bc}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    % \begin{center}
    % \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_2_5/output_33_0.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    % 
    % \begin{center}
    % \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_2_5/output_33_1.png}
    % \end{center}
    % % { \hspace*{\fill} \\}

\begin{center}
\begin{tabular}{rl}
    \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_2_5/output_33_0.png}
    % { \hspace*{\fill} \\}
    &
    \adjustimage{max size={0.4\linewidth}{0.4\paperheight}}{ex_2_5/output_33_1.png}
    % { \hspace*{\fill} \\}
\end{tabular}
\end{center}

    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{14}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{idx} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{which}\PY{p}{(}\PY{n}{bc}\PY{o}{\PYZdl{}}\PY{n}{y}\PY{o}{==}\PY{n+nf}{max}\PY{p}{(}\PY{n}{bc}\PY{o}{\PYZdl{}}\PY{n}{y}\PY{p}{)}\PY{p}{)}
\PY{n}{lambda} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{bc}\PY{o}{\PYZdl{}}\PY{n}{x}\PY{p}{[}\PY{n}{idx}\PY{p}{]}
\PY{n}{lambda}
\end{Verbatim}
\end{tcolorbox}

    0.0202020202020203

    
    得到 \(\lambda=0.0202020202020203\)， 即
\(\lambda-\textrm{logLikelihood}\) 图中的最高点。

    \begin{enumerate}
\def\labelenumi{\arabic{enumi}.}
\setcounter{enumi}{1}
\tightlist
\item
  将上一步 Box-Cox 变换的 \(\lambda\) 值代入
\end{enumerate}

\[
Y^{(\lambda)} = \left\{\begin{aligned}
\frac{Y^\lambda-1}{\lambda}, && \lambda \ne 0 \\
\ln Y, && \lambda = 0
\end{aligned}\right.
\]

拟合变换后的变量关于 \(X\) 的简单线性回归模型：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{15}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{fmbc} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{lm}\PY{p}{(}\PY{p}{(}\PY{n}{y}\PY{o}{\PYZca{}}\PY{n}{lambda}\PY{l+m}{\PYZhy{}1}\PY{p}{)}\PY{o}{/}\PY{n}{lambda} \PY{o}{\PYZti{}} \PY{n}{x}\PY{p}{,} \PY{n}{data}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{fmbc}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

Call:
lm(formula = (y\^{}lambda - 1)/lambda \textasciitilde{} x, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.14976 -0.03164 -0.01436  0.03356  0.13019 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.6978320  0.0320650  52.950   <2e-16 ***
x           0.0005291  0.0277778   0.019    0.985    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.07163 on 18 degrees of freedom
Multiple R-squared:  2.016e-05,	Adjusted R-squared:  -0.05553 
F-statistic: 0.0003628 on 1 and 18 DF,  p-value: 0.985

    \end{Verbatim}

    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{16}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{summary}\PY{p}{(}\PY{n+nf}{aov}\PY{p}{(}\PY{n}{fmbc}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
            Df  Sum Sq  Mean Sq F value Pr(>F)
x            1 0.00000 0.000002       0  0.985
Residuals   18 0.09236 0.005131               
    \end{Verbatim}

    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{17}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n+nf}{c}\PY{p}{(}\PY{l+m}{2}\PY{p}{,}\PY{l+m}{2}\PY{p}{)}\PY{p}{)}
\PY{n+nf}{plot}\PY{p}{(}\PY{n}{fmbc}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.8\linewidth}{0.8\paperheight}}{ex_2_5/output_39_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{18}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{shapiro.test}\PY{p}{(}\PY{n+nf}{residuals}\PY{p}{(}\PY{n}{fmbc}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
	Shapiro-Wilk normality test
data:  residuals(fmbc)
W = 0.94268, p-value = 0.2693
    \end{Verbatim}

    
    从上面拟合结果回归分析、方差分析、残差图、残差正态性检验结果可以看出，Box-Cox
变化没有带来明显的改进。

做 Box-Cox 变换的意义在于使满足线性模型的正态性假设，而由前面第 (1)
题的数据拟合结果，已经满足正态性，这里再作 Box-Cox 变换没有意义。\cite{ref3,ref4}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{19}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{detach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: 2.6
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    



    
    \hypertarget{ux4e60ux9898-2.6}{%
\section{习题 2.6}\label{ux4e60ux9898-2.6}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{read.csv}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{./ex\PYZus{}2\PYZus{}6.csv\PYZdq{}}\PY{p}{)}\PY{p}{;} \PY{n}{data}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 31 × 3
\begin{tabular}{lll}
 直径.x1. & 高度.x2. & 体积.y.\\
 <dbl> & <int> & <dbl>\\
\hline
	  8.3 & 70 & 10.3\\
	  8.6 & 65 & 10.3\\
	  8.8 & 63 & 10.2\\
     \vdots & \vdots & \vdots \\
	 18.0 & 80 & 51.5\\
	 18.0 & 80 & 51.0\\
	 20.6 & 87 & 77.0\\
\end{tabular}


    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{names}\PY{p}{(}\PY{n}{data}\PY{p}{)} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{c}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{x1\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{x2\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{y\PYZdq{}}\PY{p}{)}
\PY{n+nf}{attach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    在开始之前，先看一看数据的特征及相关关系：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{library}\PY{p}{(}\PY{n}{psych}\PY{p}{)}
\PY{n+nf}{pairs.panels}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_2_6/output_4_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    可以看到 \(x_1\)、\(y\) 有很强的线性相关性； \(x_2\) 对 \(y\)
的相关性略差，但也足够高。

    \hypertarget{section}{%
\subsection{(1)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_2_6/008i3skNly1gr61tiqfrwj31fw040abk.jpg}
\caption{题1}
\end{figure}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{4}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{linear\PYZus{}model} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{lm}\PY{p}{(}\PY{n}{y} \PY{o}{\PYZti{}} \PY{n}{x1} \PY{o}{+} \PY{n}{x2}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{data}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{linear\PYZus{}model}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

Call:
lm(formula = y \textasciitilde{} x1 + x2, data = data)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.4065 -2.6493 -0.2876  2.2003  8.4847 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -57.9877     8.6382  -6.713 2.75e-07 ***
x1            4.7082     0.2643  17.816  < 2e-16 ***
x2            0.3393     0.1302   2.607   0.0145 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 3.882 on 28 degrees of freedom
Multiple R-squared:  0.948,	Adjusted R-squared:  0.9442 
F-statistic:   255 on 2 and 28 DF,  p-value: < 2.2e-16

    \end{Verbatim}

    
    分析结果中模型 \(p<0.05\)，假设合理，\(R^2>0.94\) 拟合效果较好。
但注意到相比于 \(x_1\)， \(x_2\) 项对 \(y\)
没有非常显著的影响（\(0.01<p<0.05\)）。

用 aov 作出方差分析表：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{5}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{summary}\PY{p}{(}\PY{n+nf}{aov}\PY{p}{(}\PY{n}{linear\PYZus{}model}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
            Df Sum Sq Mean Sq F value Pr(>F)    
x1           1   7582    7582 503.150 <2e-16 ***
x2           1    102     102   6.794 0.0145 *  
Residuals   28    422      15                   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    \end{Verbatim}

    
    下面作出各种残差诊断图：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n+nf}{c}\PY{p}{(}\PY{l+m}{2}\PY{p}{,}\PY{l+m}{2}\PY{p}{)}\PY{p}{)}
\PY{n+nf}{plot}\PY{p}{(}\PY{n}{linear\PYZus{}model}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.8\linewidth}{0.8\paperheight}}{ex_2_6/output_11_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    从残差拟合图中可以看出，学生化残差明显不在同一条直线上，考虑对 \(Y\) 作
Box\_Cox 变换。

    \hypertarget{section}{%
\subsection{(2)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_2_6/008i3skNly1gr6a81zi2cj31eq04840k.jpg}
\caption{题（2）}
\end{figure}

    下面对数据进行 Box-Cox 变换后重新拟合：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{23}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{library}\PY{p}{(}\PY{n}{MASS}\PY{p}{)}
\PY{n}{bc} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{boxcox}\PY{p}{(}\PY{n}{y} \PY{o}{\PYZti{}} \PY{n}{.}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{data}\PY{p}{)}
\PY{n}{idx} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{which}\PY{p}{(}\PY{n}{bc}\PY{o}{\PYZdl{}}\PY{n}{y}\PY{o}{==}\PY{n+nf}{max}\PY{p}{(}\PY{n}{bc}\PY{o}{\PYZdl{}}\PY{n}{y}\PY{p}{)}\PY{p}{)}
\PY{n}{lambda} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{bc}\PY{o}{\PYZdl{}}\PY{n}{x}\PY{p}{[}\PY{n}{idx}\PY{p}{]}
\PY{n}{lambda}
\end{Verbatim}
\end{tcolorbox}

    0.303030303030303

    
    % A removed image
    % \begin{center}
    % \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_2_6/output_15_1.png}
    % \end{center}
    % % { \hspace*{\fill} \\}
    
    得到了 \(\lambda = 0.303030303030303\)，下面代入变换，重新拟合：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{24}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{fmbc} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{lm}\PY{p}{(}\PY{p}{(}\PY{n}{y}\PY{o}{\PYZca{}}\PY{n}{lambda}\PY{l+m}{\PYZhy{}1}\PY{p}{)}\PY{o}{/}\PY{n}{lambda} \PY{o}{\PYZti{}} \PY{n}{.}\PY{p}{,} \PY{n}{data}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{fmbc}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

Call:
lm(formula = (y\^{}lambda - 1)/lambda \textasciitilde{} ., data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.42600 -0.14274 -0.01468  0.18705  0.36851 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -2.733542   0.500080  -5.466 7.77e-06 ***
x1           0.409448   0.015299  26.764  < 2e-16 ***
x2           0.039685   0.007535   5.267 1.34e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2247 on 28 degrees of freedom
Multiple R-squared:  0.9775,	Adjusted R-squared:  0.9759 
F-statistic: 609.6 on 2 and 28 DF,  p-value: < 2.2e-16

    \end{Verbatim}

    
    拟合效果有所提升，

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{26}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{summary}\PY{p}{(}\PY{n+nf}{aov}\PY{p}{(}\PY{n}{fmbc}\PY{p}{)}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
            Df Sum Sq Mean Sq F value   Pr(>F)    
x1           1  60.17   60.17 1191.45  < 2e-16 ***
x2           1   1.40    1.40   27.74 1.34e-05 ***
Residuals   28   1.41    0.05                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    \end{Verbatim}

    
    检验 \(p<0.001\)，\(x_1\)、\(x_2\) 对变换后的 \(y\)
有显著影响，认为变换后的 \(Y\) 与 \(X_1\)、\(X_2\)
之间的线性关系较为合理。

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{25}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n+nf}{c}\PY{p}{(}\PY{l+m}{2}\PY{p}{,}\PY{l+m}{2}\PY{p}{)}\PY{p}{)}
\PY{n+nf}{plot}\PY{p}{(}\PY{n}{fmbc}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.8\linewidth}{0.8\paperheight}}{ex_2_6/output_21_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    从图中可以看出，无论是学生化残差的正态 Q-Q
图还是变换后因变量的拟合值都有明显的改观。

还可以进一步作正态性检验：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{19}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{ks.test}\PY{p}{(}\PY{n+nf}{residuals}\PY{p}{(}\PY{n}{fmbc}\PY{p}{)}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{pnorm\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

	One-sample Kolmogorov-Smirnov test

data:  residuals(fmbc)
D = 0.35625, p-value = 0.0005008
alternative hypothesis: two-sided

    \end{Verbatim}

    
    综上，认为数据满足正态性。Box-Cox 变换效果显著。

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{29}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{detach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: 3.4
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    


    
    \hypertarget{ux4e60ux9898-3.4}{%
\section{习题 3.4}\label{ux4e60ux9898-3.4}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{raw\PYZus{}data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{read.csv}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{./ex\PYZus{}3\PYZus{}4.csv\PYZdq{}}\PY{p}{); }\PY{n}{raw\PYZus{}data}
\end{Verbatim}
\end{tcolorbox}

% \resizebox{\textwidth}{15mm}{
\begin{center}
\begin{tabular}{lllllll}
 催化剂 & & 产 & 品 & 得 & 率 & \\
  & X.1 & X.2 & X.3 & X.4 & X.5 & X.6\\
 <chr> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl>\\
\hline
	 A1 & 0.88 & 0.85 & 0.79 & 0.86 & 0.85 & 0.83\\
	 A2 & 0.87 & 0.92 & 0.85 & 0.83 & 0.90 & 0.80\\
	 A3 & 0.84 & 0.78 & 0.81 & 0.80 & 0.85 & 0.83\\
	 A4 & 0.81 & 0.86 & 0.90 & 0.87 & 0.78 & 0.79\\
\end{tabular}
\end{center}
% }


    
    为方便后续分析，先将数据处理成 \(<A, x>\) 的序对形式：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{A} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{p}{)}
\PY{n}{x} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{p}{)}
\PY{n+nf}{for }\PY{p}{(}\PY{n}{row} \PY{n}{in} \PY{l+m}{1}\PY{o}{:}\PY{n+nf}{nrow}\PY{p}{(}\PY{n}{raw\PYZus{}data}\PY{p}{)}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n+nf}{for }\PY{p}{(}\PY{n}{col} \PY{n}{in} \PY{l+m}{2}\PY{o}{:}\PY{n+nf}{ncol}\PY{p}{(}\PY{n}{raw\PYZus{}data}\PY{p}{)}\PY{p}{)} \PY{p}{\PYZob{}}
        \PY{n}{A} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{n}{A}\PY{p}{,} \PY{n}{raw\PYZus{}data}\PY{p}{[}\PY{n}{row}\PY{p}{,} \PY{l+m}{1}\PY{p}{]}\PY{p}{)}
        \PY{n}{x} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{n}{x}\PY{p}{,} \PY{n}{raw\PYZus{}data}\PY{p}{[}\PY{n}{row}\PY{p}{,} \PY{n}{col}\PY{p}{]}\PY{p}{)}
    \PY{p}{\PYZcb{}}
\PY{p}{\PYZcb{}}
\PY{n}{data} \PY{o}{=} \PY{n+nf}{data.frame}\PY{p}{(}\PY{n}{A}\PY{p}{,} \PY{n}{x}\PY{p}{)}
\PY{n}{data}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 24 × 2
\begin{tabular}{ll}
 A & x\\
 <chr> & <dbl>\\
\hline
	 A1 & 0.88\\
	 A1 & 0.85\\
	 A1 & 0.79\\
	 \vdots & \vdots \\
	 A4 & 0.87\\
	 A4 & 0.78\\
	 A4 & 0.79\\
\end{tabular}


    
    \hypertarget{ux6b63ux6001ux6027ux68c0ux9a8c}{%
\subsection{正态性检验}\label{ux6b63ux6001ux6027ux68c0ux9a8c}}

首先，第一步，需要对数据进行正态性检验：

用 \texttt{shapiro.test} 检验 4 组数据是否服从正态分布：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{shapiro.test}\PY{p}{(}\PY{n}{x}\PY{p}{[}\PY{l+m}{1}\PY{o}{:}\PY{l+m}{6}\PY{p}{]}\PY{p}{)}
\PY{n+nf}{shapiro.test}\PY{p}{(}\PY{n}{x}\PY{p}{[}\PY{l+m}{7}\PY{o}{:}\PY{l+m}{12}\PY{p}{]}\PY{p}{)}
\PY{n+nf}{shapiro.test}\PY{p}{(}\PY{n}{x}\PY{p}{[}\PY{l+m}{13}\PY{o}{:}\PY{l+m}{18}\PY{p}{]}\PY{p}{)}
\PY{n+nf}{shapiro.test}\PY{p}{(}\PY{n}{x}\PY{p}{[}\PY{l+m}{19}\PY{o}{:}\PY{l+m}{24} \PY{p}{]}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
	Shapiro-Wilk normality test
data:  x[1:6]
W = 0.92835, p-value = 0.5674
    \end{Verbatim}

    
    
    \begin{Verbatim}[commandchars=\\\{\}]
	Shapiro-Wilk normality test
data:  x[7:12]
W = 0.9831, p-value = 0.9659
    \end{Verbatim}

    
    
    \begin{Verbatim}[commandchars=\\\{\}]
	Shapiro-Wilk normality test
data:  x[13:18]
W = 0.96579, p-value = 0.8631
    \end{Verbatim}

    
    
    \begin{Verbatim}[commandchars=\\\{\}]
	Shapiro-Wilk normality test
data:  x[19:24]
W = 0.92097, p-value = 0.5124
    \end{Verbatim}

    
    A1、A2、A3、A4 四个组的 shapiro.test 检验 p 值依次为
0.5674、0.9659、0.8631、0.5124，均大于 0.05，
认为原假设成立（\(H_0\)：假设数据服从正态分布），表明 4
组数据均来自正态分布总体。

    \hypertarget{ux65b9ux5deeux9f50ux6b21ux68c0ux9a8c}{%
\subsection{方差齐次检验}\label{ux65b9ux5deeux9f50ux6b21ux68c0ux9a8c}}

接下来进行方差齐次检验：

用 \texttt{bartlett.test} 检验 4 个分组数据方差是否一致：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{4}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{bartlett.test}\PY{p}{(}\PY{n}{x} \PY{o}{\PYZti{}} \PY{n}{A}\PY{p}{,} \PY{n}{data} \PY{o}{=} \PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

	Bartlett test of homogeneity of variances

data:  x by A
Bartlett's K-squared = 2.2257, df = 3, p-value = 0.5269

    \end{Verbatim}

    
    得到 p-value = 0.5269 \textgreater{} 0.05，认为原假设成立（\(H_0\)：假设
4 组数据方差相等），表明 4 组数据的方差齐次。

    通过了上面两个检验，下面就可以开始做单因素方差分析了。

\hypertarget{ux56feux5f62ux53efux89c6ux5316}{%
\subsection{图形可视化}\label{ux56feux5f62ux53efux89c6ux5316}}

开始之前，还可以考虑先用箱图观察一下几组数据的分布：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{5}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{boxplot}\PY{p}{(}\PY{n}{x} \PY{o}{\PYZti{}} \PY{n}{A}\PY{p}{,} \PY{n}{data} \PY{o}{=} \PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_3_4/output_11_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    \hypertarget{ux5355ux56e0ux7d20ux65b9ux5deeux5206ux6790}{%
\subsection{单因素方差分析}\label{ux5355ux56e0ux7d20ux65b9ux5deeux5206ux6790}}

    使用 aov 函数完成方差分析：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{dfc} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{aov}\PY{p}{(}\PY{n}{x}\PY{o}{\PYZti{}}\PY{n}{A}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{data}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{dfc}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
            Df   Sum Sq  Mean Sq F value Pr(>F)
A            3 0.005846 0.001949   1.306    0.3
Residuals   20 0.029850 0.001492               
    \end{Verbatim}

    
    从方差分析表中看到 p-value = 0.3 \textgreater{} 0.05，接收原假设
\(H_0\)， 认为四种不同催化剂对产品的得到率无显著影响。\cite{ref5}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: 3.5
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    


    
    \hypertarget{ux4e60ux9898-3.5}{%
\section{习题 3.5}\label{ux4e60ux9898-3.5}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{raw\PYZus{}data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{read.csv}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{./ex\PYZus{}3\PYZus{}5.csv\PYZdq{}}\PY{p}{); }\PY{n}{raw\PYZus{}data}
\end{Verbatim}
\end{tcolorbox}

\resizebox{\textwidth}{15mm}{
\begin{tabular}{lllllllllllll}
科研经费投入&&&生&产&能&力&提&高&量&&&\\
  & x.0 & x.1 & x.2 & x.3 & x.4 & x.5 & x.6 & x.7 & x.8 & x.9 & x.10 & x.11\\
 <chr> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <dbl> & <chr> & <chr> & <chr> & <chr> & <chr> & <dbl>\\
\hline
	 低 & 7.6 & 8.2 &  6.8 & 5.8 & 6.9 & 6.6 & 6.3 & 7.7 & 6   &     &     &  NA\\
	 中 & 6.7 & 8.1 &  9.4 & 8.6 & 7.8 & 7.7 & 8.9 & 7.9 & 8.3 & 8.7 & 7.1 & 8.4\\
	 高 & 8.5 & 9.7 & 10.1 & 7.8 & 9.6 & 9.5 &     &     &     &     &     &  NA\\
\end{tabular}}

首先还是把数据处理成两列：
    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{t\PYZus{}raw\PYZus{}data} \PY{o}{=} \PY{n+nf}{t}\PY{p}{(}\PY{n}{raw\PYZus{}data}\PY{p}{)}  \PY{c+c1}{\PYZsh{} 转置，方便取数据}
\PY{n}{dict} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{low\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{mid\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{high\PYZdq{}}\PY{p}{)}
\PY{n+nf}{names}\PY{p}{(}\PY{n}{dict}\PY{p}{)} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{c}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{低\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{中\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{高\PYZdq{}}\PY{p}{)}
\PY{n}{investment} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{p}{)}   \PY{c+c1}{\PYZsh{} 科研经费投入}
\PY{n}{improvement} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{p}{)}  \PY{c+c1}{\PYZsh{} 生产能力提高量}
\PY{n+nf}{for }\PY{p}{(}\PY{n}{i} \PY{n}{in} \PY{l+m}{1}\PY{o}{:}\PY{l+m}{3}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n}{b} \PY{o}{=} \PY{n}{dict}\PY{p}{[[}\PY{n}{t\PYZus{}raw\PYZus{}data}\PY{p}{[[}\PY{l+m}{1}\PY{p}{,}\PY{n}{i}\PY{p}{]]}\PY{p}{]]}   \PY{c+c1}{\PYZsh{} 科研经费投入}
    \PY{n}{a} \PY{o}{=} \PY{n+nf}{array}\PY{p}{(}\PY{n}{t\PYZus{}raw\PYZus{}data}\PY{p}{[}\PY{l+m}{\PYZhy{}1}\PY{p}{,}\PY{n}{i}\PY{p}{]}\PY{p}{)}   \PY{c+c1}{\PYZsh{} 生产能力提高量：下面 2 行将数据转化为 dobule 型，并清除 NA、空值}
    \PY{n}{a} \PY{o}{=} \PY{n+nf}{apply}\PY{p}{(}\PY{n}{a}\PY{p}{[}\PY{o}{!}\PY{n+nf}{is.na}\PY{p}{(}\PY{n}{a}\PY{p}{)}\PY{p}{]}\PY{p}{,} \PY{l+m}{1}\PY{p}{,} \PY{n}{as.double}\PY{p}{)}  \PY{c+c1}{\PYZsh{} 这里会有一些产生 NA 的 Warning，这个问题来自：as.double(\PYZdq{}\PYZdq{})，不必在意}
    \PY{n}{a} \PY{o}{=} \PY{n}{a}\PY{p}{[}\PY{o}{!}\PY{n+nf}{is.na}\PY{p}{(}\PY{n}{a}\PY{p}{)}\PY{p}{]}   
    \PY{n+nf}{for }\PY{p}{(}\PY{n}{p} \PY{n}{in} \PY{n}{a}\PY{p}{)} \PY{p}{\PYZob{}}
        \PY{n}{investment} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{n}{investment}\PY{p}{,} \PY{n}{b}\PY{p}{)}
        \PY{n}{improvement} \PY{o}{=} \PY{n+nf}{c}\PY{p}{(}\PY{n}{improvement}\PY{p}{,} \PY{n}{p}\PY{p}{)}
    \PY{p}{\PYZcb{}}
\PY{p}{\PYZcb{}}
\PY{n}{data} \PY{o}{=} \PY{n+nf}{data.frame}\PY{p}{(}\PY{n}{investment}\PY{p}{,} \PY{n}{improvement}\PY{p}{);}\PY{n}{data}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
Warning message in apply(a[!is.na(a)], 1, as.double):
“强制改变过程中产生了NA”
    \end{Verbatim}

    A data.frame: 27 × 2
\begin{tabular}{ll}
 investment & improvement\\
 <chr> & <dbl>\\
\hline
	 low  &  7.6\\
	 low  &  8.2\\
	 low  &  6.8\\
     \vdots & \vdots \\
	 high &  7.8\\
	 high &  9.6\\
	 high &  9.5\\
\end{tabular}


    
    \hypertarget{section}{%
\subsection{(1)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_3_5/008i3skNly1gr76e2inkyj31eg03y0ua.jpg}
\caption{题（1）}
\end{figure}

    首先，通过箱线图，直观感觉是有显著差异的：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{boxplot}\PY{p}{(}\PY{n}{improvement} \PY{o}{\PYZti{}} \PY{n}{investment}\PY{p}{,} \PY{n}{data} \PY{o}{=} \PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.5\linewidth}{0.5\paperheight}}{ex_3_5/output_5_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    接下来建立方差分析表：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{4}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{dfc} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{aov}\PY{p}{(}\PY{n}{improvement} \PY{o}{\PYZti{}} \PY{n}{investment}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{data}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{dfc}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
            Df Sum Sq Mean Sq F value   Pr(>F)    
investment   2  20.12   10.06   15.72 4.33e-05 ***
Residuals   24  15.36    0.64                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    \end{Verbatim}

    
    检验 \(p < 0.05\)，拒绝原假设，认为在显著水平 \(\alpha=0.05\)
下过去三年科研经费投入的不同对当年生产力的提高有显著影响。

    \hypertarget{section}{%
\subsection{(2)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_3_5/008i3skNly1gr771206vtj61fo08an1e02.jpg}
\caption{题（2）}
\end{figure}

    首先为了方便，提取出几个组：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{5}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{group\PYZus{}low}  \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{data}\PY{p}{[}\PY{n}{investment}\PY{o}{==}\PY{l+s}{\PYZdq{}}\PY{l+s}{low\PYZdq{}}\PY{p}{,}\PY{p}{]}
\PY{n}{group\PYZus{}mid}  \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{data}\PY{p}{[}\PY{n}{investment}\PY{o}{==}\PY{l+s}{\PYZdq{}}\PY{l+s}{mid\PYZdq{}}\PY{p}{,}\PY{p}{]}
\PY{n}{group\PYZus{}high} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{data}\PY{p}{[}\PY{n}{investment}\PY{o}{==}\PY{l+s}{\PYZdq{}}\PY{l+s}{high\PYZdq{}}\PY{p}{,}\PY{p}{]}
\end{Verbatim}
\end{tcolorbox}

    利用 \texttt{t.test} 容易获取到均值的置信区间\cite{ref6}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{t.test}\PY{p}{(}\PY{n}{group\PYZus{}low}\PY{o}{\PYZdl{}}\PY{n}{improvement}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

	One Sample t-test

data:  group\_low\$improvement
t = 25.361, df = 8, p-value = 6.261e-09
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
 6.252390 7.503166
sample estimates:
mean of x 
 6.877778 

    \end{Verbatim}

    
    其中 \texttt{mean\ of\ x} 为均值，\texttt{confidence\ interval}
即置信区间。

为了方便，封装如下函数，用来提取这些信息：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{7}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{mean\PYZus{}confin} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{function}\PY{p}{(}\PY{n}{x}\PY{p}{,} \PY{k+kc}{...}\PY{p}{)} \PY{p}{\PYZob{}}    \PY{c+c1}{\PYZsh{} mean and confidence interval of x by t.test}
    \PY{n}{t.res} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{t.test}\PY{p}{(}\PY{n}{x}\PY{p}{,} \PY{k+kc}{...}\PY{p}{)}
    \PY{n}{mean\PYZus{}val} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{t.res}\PY{o}{\PYZdl{}}\PY{n}{estimate}\PY{p}{[[}\PY{l+s}{\PYZdq{}}\PY{l+s}{mean of x\PYZdq{}}\PY{p}{]]}
    \PY{n}{mean\PYZus{}conf.in} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n}{t.res}\PY{o}{\PYZdl{}}\PY{n}{conf.in}
    \PY{n}{res} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{c}\PY{p}{(}\PY{n}{mean\PYZus{}val}\PY{p}{,} \PY{n}{mean\PYZus{}conf.in}\PY{p}{)}
    \PY{n+nf}{names}\PY{p}{(}\PY{n}{res}\PY{p}{)} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{c}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{mean\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{conf.left\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{conf.right\PYZdq{}}\PY{p}{)}
    \PY{n}{res}  \PY{c+c1}{\PYZsh{} ret}
\PY{p}{\PYZcb{}}
\end{Verbatim}
\end{tcolorbox}

    调用上面封装的函数可以很方便地得到一个表格：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{8}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{mu} \PY{o}{\PYZlt{}\PYZhy{}} \PY{p}{(}\PY{n+nf}{function}\PY{p}{(}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n}{L} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{mean\PYZus{}confin}\PY{p}{(}\PY{n}{group\PYZus{}low}\PY{o}{\PYZdl{}}\PY{n}{improvement}\PY{p}{)}
    \PY{n}{M} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{mean\PYZus{}confin}\PY{p}{(}\PY{n}{group\PYZus{}mid}\PY{o}{\PYZdl{}}\PY{n}{improvement}\PY{p}{)}
    \PY{n}{H} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{mean\PYZus{}confin}\PY{p}{(}\PY{n}{group\PYZus{}high}\PY{o}{\PYZdl{}}\PY{n}{improvement}\PY{p}{)}
    \PY{n+nf}{rbind}\PY{p}{(}\PY{n}{L}\PY{p}{,} \PY{n}{M}\PY{p}{,} \PY{n}{H}\PY{p}{)}
\PY{p}{\PYZcb{}}\PY{p}{)}\PY{p}{(}\PY{p}{)}
\PY{n}{mu}
\end{Verbatim}
\end{tcolorbox}

    A matrix: 3 × 3 of type dbl
\begin{tabular}{r|lll}
  & mean & conf.left & conf.right\\
\hline
	L & 6.877778 & 6.252390 &  7.503166\\
	M & 8.133333 & 7.652239 &  8.614427\\
	H & 9.200000 & 8.289951 & 10.110049\\
\end{tabular}


    
    得到三年经费投入为低、中、高情况下当年生产能力提高量的均值为：

\[
\begin{aligned}
\mu_L & =  6.877778 \\
\mu_M & =  8.133333 \\
\mu_H & =  9.2 \\
\end{aligned}
\]

各 95\% 置信区间如下：

\[
\begin{aligned}
\mu_L & \in  (6.252390, 7.503166) \\
\mu_M & \in (7.652239, 8.614427) \\
\mu_H & \in (8.289951, 10.110049) \\
\end{aligned}
\]

    当然，这里也可以手动实现计算过程，但比较麻烦：

\scriptsize
\begin{lstlisting}[numbers=left, numberstyle=\tiny]
# 分类汇总：means of improvements
grouped_means <- aggregate(improvement, by=list(investment), FUN=mean)
# 从方差分析表中提取出 MSE
mse <- anova(dfc)["Residuals", "Mean Sq"]
# 带入公式，用 qt 计算 t 分位数，得到置信区间
...
\end{lstlisting}
\normalsize

    要求 Bonferroni 同时置信区间，R
好像没有内置的实现，同样手写麻烦，所以考虑调用第三方包\cite{ref7}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{9}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c+c1}{\PYZsh{} install.packages(\PYZdq{}DescTools\PYZdq{})}
\PY{n+nf}{require}\PY{p}{(}\PY{n}{DescTools}\PY{p}{)}
\PY{n+nf}{PostHocTest}\PY{p}{(}\PY{n}{dfc}\PY{p}{,} \PY{n}{method} \PY{o}{=} \PY{l+s}{\PYZdq{}}\PY{l+s}{bonferroni\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{Verbatim}[commandchars=\\\{\}]
Loading required package: DescTools

    \end{Verbatim}

    
    \begin{Verbatim}[commandchars=\\\{\}]

  Posthoc multiple comparisons of means : Bonferroni 
    95\% family-wise confidence level

\$investment
              diff     lwr.ci      upr.ci    pval    
low-high -2.322222 -3.4074430 -1.23700140 3.5e-05 ***
mid-high -1.066667 -2.0961975 -0.03713579  0.0405 *  
mid-low   1.255556  0.3475947  2.16351644  0.0048 ** 

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

    \end{Verbatim}

    
    得到 \(\mu_L-\mu_M\)，\(\mu_L-\mu_H\) 和 \(\mu_M-\mu_H\) 的置信度不小于
95\% 的 Bonferroni 同时置信区间：

\[
\begin{aligned}
\mu_L-\mu_M & \in (-2.16351644, -0.3475947  ) \\
\mu_L-\mu_H & \in (-3.4074430, -1.23700140) \\
\mu_M-\mu_H & \in (-2.0961975, -0.03713579) \\
\end{aligned}
\]

    从 \(\mu_L-\mu_M\)，\(\mu_L-\mu_H\) 和 \(\mu_M-\mu_H\) 的 Bonferroni
同时置信区间都位于负值区间可知，
随着三年科研经费的投入越高，当年生产能力的改善越显著。



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: 3.6
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    

   
    \hypertarget{ux4e60ux9898-3.6}{%
\section{习题 3.6}\label{ux4e60ux9898-3.6}}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{1}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{read.table}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{./ex\PYZus{}3\PYZus{}6.meaningfulize.txt\PYZdq{}}\PY{p}{,} \PY{n}{header}\PY{o}{=}\PY{k+kc}{TRUE}\PY{p}{); }\PY{n}{data}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 108 × 3
\begin{tabular}{lll}
 FeIon & Dose & Retention\\
 <chr> & <chr> & <dbl>\\
\hline
	 Fe3 & high & 0.71\\
	 Fe3 & high & 1.66\\
	 Fe3 & high & 2.01\\
	 \vdots & \vdots & \vdots\\
	 Fe2 & low & 19.87\\
	 Fe2 & low & 21.60\\
	 Fe2 & low & 22.25\\
\end{tabular}


    
    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{2}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{attach}\PY{p}{(}\PY{n}{data}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \hypertarget{section}{%
\subsection{(1)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_3_6/008i3skNly1gr7f8iaibgj31lo06y41x.jpg}
\caption{题（1）}
\end{figure}

    首先，求出各组合观测值的样本均值、标准差。这里可以利用 aggregate
做分类汇总：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{3}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{grouped\PYZus{}means} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{aggregate}\PY{p}{(}\PY{n}{Retention}\PY{p}{,} \PY{n}{by}\PY{o}{=}\PY{n+nf}{list}\PY{p}{(}\PY{n}{FeIon}\PY{p}{,} \PY{n}{Dose}\PY{p}{)}\PY{p}{,} \PY{n}{FUN}\PY{o}{=}\PY{n}{mean}\PY{p}{)}
\PY{n}{grouped\PYZus{}sds}  \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{aggregate}\PY{p}{(}\PY{n}{Retention}\PY{p}{,} \PY{n}{by}\PY{o}{=}\PY{n+nf}{list}\PY{p}{(}\PY{n}{FeIon}\PY{p}{,} \PY{n}{Dose}\PY{p}{)}\PY{p}{,} \PY{n}{FUN}\PY{o}{=}\PY{n}{sd}\PY{p}{)}
\PY{c+c1}{\PYZsh{} 下面几行代码将结果整合到一个表格，方便查看：}
\PY{n}{grouped\PYZus{}means\PYZus{}sds} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{cbind}\PY{p}{(}\PY{n}{grouped\PYZus{}means}\PY{p}{,} \PY{n}{grouped\PYZus{}sds}\PY{p}{[}\PY{l+s}{\PYZdq{}}\PY{l+s}{x\PYZdq{}}\PY{p}{]}\PY{p}{)}
\PY{n+nf}{names}\PY{p}{(}\PY{n}{grouped\PYZus{}means\PYZus{}sds}\PY{p}{)} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{c}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{FeIon\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{Dose\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{mean\PYZdq{}}\PY{p}{,} \PY{l+s}{\PYZdq{}}\PY{l+s}{sd\PYZdq{}}\PY{p}{)}
\PY{n}{grouped\PYZus{}means\PYZus{}sds}
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 6 × 4
\begin{tabular}{llll}
 FeIon & Dose & mean & sd\\
 <chr> & <chr> & <dbl> & <dbl>\\
\hline
	 Fe2 & high &  5.936667 & 2.806778\\
	 Fe3 & high &  3.698889 & 2.030870\\
	 Fe2 & low  & 12.639444 & 6.082089\\
	 Fe3 & low  & 11.750000 & 7.028150\\
	 Fe2 & mid  &  9.632222 & 6.691215\\
	 Fe3 & mid  &  8.203889 & 5.447386\\
\end{tabular}


    
    为方便观察，可以画出箱线图来比较：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{6}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n+nf}{par}\PY{p}{(}\PY{n}{mfrow}\PY{o}{=}\PY{n+nf}{c}\PY{p}{(}\PY{l+m}{2}\PY{p}{,}\PY{l+m}{2}\PY{p}{)}\PY{p}{)}
\PY{n+nf}{boxplot}\PY{p}{(}\PY{n}{`mean`} \PY{o}{\PYZti{}} \PY{n}{`FeIon`}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{grouped\PYZus{}means\PYZus{}sds}\PY{p}{)}
\PY{n+nf}{boxplot}\PY{p}{(}\PY{n}{`sd`} \PY{o}{\PYZti{}} \PY{n}{`FeIon`}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{grouped\PYZus{}means\PYZus{}sds}\PY{p}{)}
\PY{n+nf}{boxplot}\PY{p}{(}\PY{n}{`mean`} \PY{o}{\PYZti{}} \PY{n}{`Dose`}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{grouped\PYZus{}means\PYZus{}sds}\PY{p}{)}
\PY{n+nf}{boxplot}\PY{p}{(}\PY{n}{`sd`} \PY{o}{\PYZti{}} \PY{n}{`Dose`}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{grouped\PYZus{}means\PYZus{}sds}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    \begin{center}
    \adjustimage{max size={0.6\linewidth}{0.6\paperheight}}{ex_3_6/output_11_0.png}
    \end{center}
    %{ \hspace*{\fill} \\}
    
    从比较结果来看，高剂量组标准差明显异于其他两组，认为假定误差的等方差性不太合理。
所以不能直接进行方差分析。

    \hypertarget{section}{%
\subsection{(2)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_3_6/008i3skNly1gr7gc6aor7j61mu04iq4i02.jpg}
\caption{题（2）}
\end{figure}

    自然对数变换，把变换后的数据列叫做 \texttt{lnRetention}：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{7}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{lnRetention} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{log}\PY{p}{(}\PY{n}{Retention}\PY{p}{)}
\PY{n}{data} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{cbind}\PY{p}{(}\PY{n}{data}\PY{p}{,} \PY{n}{lnRetention}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    计算变换后的分组均值、标准差：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{8}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{grouped\PYZus{}ln\PYZus{}means} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{aggregate}\PY{p}{(}\PY{n}{lnRetention}\PY{p}{,} \PY{n}{by}\PY{o}{=}\PY{n+nf}{list}\PY{p}{(}\PY{n}{FeIon}\PY{p}{,} \PY{n}{Dose}\PY{p}{)}\PY{p}{,} \PY{n}{FUN}\PY{o}{=}\PY{n}{mean}\PY{p}{)}
\PY{n}{grouped\PYZus{}ln\PYZus{}sds}   \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{aggregate}\PY{p}{(}\PY{n}{lnRetention}\PY{p}{,} \PY{n}{by}\PY{o}{=}\PY{n+nf}{list}\PY{p}{(}\PY{n}{FeIon}\PY{p}{,} \PY{n}{Dose}\PY{p}{)}\PY{p}{,} \PY{n}{FUN}\PY{o}{=}\PY{n}{sd}\PY{p}{)}
\PY{c+c1}{\PYZsh{} 下面省略和前面类似的代码，将结果整合到一个表格，方便查看：}
...
\end{Verbatim}
\end{tcolorbox}

    A data.frame: 6 × 4
\begin{tabular}{llll}
 FeIon & Dose & mean & sd\\
 <chr> & <chr> & <dbl> & <dbl>\\
\hline
	 Fe2 & high & 1.680129 & 0.4645464\\
	 Fe3 & high & 1.160924 & 0.5854773\\
	 Fe2 & low  & 2.403389 & 0.5693701\\
	 Fe3 & low  & 2.279981 & 0.6563113\\
	 Fe2 & mid  & 2.090045 & 0.5736511\\
	 Fe3 & mid  & 1.901225 & 0.6585116\\
\end{tabular}


    
    作图比较（代码和前面类似，省略了）：

    \begin{center}
    \adjustimage{max size={0.6\linewidth}{0.6\paperheight}}{ex_3_6/output_19_0.png}
    \end{center}
    % { \hspace*{\fill} \\}
    
    可以看到，现在个组标准差趋于一致，各族间标准差差异不大。
可以利用变换之后的数据进行方差分析了。

    \hypertarget{section}{%
\subsection{(3)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_3_6/008i3skNly1gr7grannygj31le050q51.jpg}
\caption{题（3）}
\end{figure}

用变换后的数据，重新做 aov：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{10}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{n}{lnRetention.aov} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{aov}\PY{p}{(}\PY{n}{lnRetention} \PY{o}{\PYZti{}} \PY{n}{FeIon} \PY{o}{+} \PY{n}{Dose} \PY{o}{+} \PY{n}{FeIon}\PY{o}{:}\PY{n}{Dose}\PY{p}{,} \PY{n}{data}\PY{o}{=}\PY{n}{data}\PY{p}{)}
\PY{n+nf}{summary}\PY{p}{(}\PY{n}{lnRetention.aov}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]
             Df Sum Sq Mean Sq F value   Pr(>F)    
FeIon         1   2.07   2.074   5.993   0.0161 *  
Dose          2  15.59   7.794  22.524 7.91e-09 ***f
FeIon:Dose    2   0.81   0.405   1.171   0.3143    
Residuals   102  35.30   0.346                     
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    \end{Verbatim}

    
    从结果中可以看出，在显著水平 \(\alpha=0.05\) 下，
铁离子种类因素（\(\textrm{Fe}^{2+}\)、\(\textrm{Fe}^{3+}\)）和
剂量因素（剂量低、中、高）对存留量的影响均显著（检验 p 值都小于
\(0.05\)）。
即说明两种铁离子存留量是有显著差异的，不同剂量水平下存留量也是有显著差异的。

同时，可以看到在该水平下，交叉因子
(\texttt{FeIon:Dose}项）对存留量影响不显著（\(p=0.3143>0.05\)），认为两种铁离子存留量在不同剂量水平下可认为是相同的。

    \hypertarget{section}{%
\subsection{(4)}\label{section}}

\begin{figure}
\centering
\includegraphics{ex_3_6/008i3skNly1gr7haazrvej31li050acr.jpg}
\caption{题（4）}
\end{figure}

    先求各因素在不同水平下的均值以及估计区间，可以复用 3.5 中封装的
mean\_confin 函数。

    按离子：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{12}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{p}{(}\PY{n+nf}{function}\PY{p}{(}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n}{Fe2} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{mean\PYZus{}confin}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{n}{FeIon}\PY{o}{==}\PY{l+s}{\PYZdq{}}\PY{l+s}{Fe2\PYZdq{}}\PY{p}{,}\PY{p}{]}\PY{o}{\PYZdl{}}\PY{n}{lnRetention}\PY{p}{)}
    \PY{n}{Fe3} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{mean\PYZus{}confin}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{n}{FeIon}\PY{o}{==}\PY{l+s}{\PYZdq{}}\PY{l+s}{Fe3\PYZdq{}}\PY{p}{,}\PY{p}{]}\PY{o}{\PYZdl{}}\PY{n}{lnRetention}\PY{p}{)}
    \PY{n+nf}{rbind}\PY{p}{(}\PY{n}{Fe2}\PY{p}{,} \PY{n}{Fe3}\PY{p}{)}
\PY{p}{\PYZcb{}}\PY{p}{)}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    A matrix: 2 × 3 of type dbl
\begin{tabular}{r|lll}
  & mean & conf.left & conf.right\\
\hline
	Fe2 & 2.057854 & 1.892251 & 2.223458\\
	Fe3 & 1.780710 & 1.568011 & 1.993409\\
\end{tabular}


    
    按剂量：

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{13}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{p}{(}\PY{n+nf}{function}\PY{p}{(}\PY{p}{)} \PY{p}{\PYZob{}}
    \PY{n}{Low}  \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{mean\PYZus{}confin}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{n}{Dose}\PY{o}{==}\PY{l+s}{\PYZdq{}}\PY{l+s}{low\PYZdq{}}\PY{p}{,}\PY{p}{]}\PY{o}{\PYZdl{}}\PY{n}{lnRetention}\PY{p}{)}
    \PY{n}{Mid}  \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{mean\PYZus{}confin}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{n}{Dose}\PY{o}{==}\PY{l+s}{\PYZdq{}}\PY{l+s}{mid\PYZdq{}}\PY{p}{,}\PY{p}{]}\PY{o}{\PYZdl{}}\PY{n}{lnRetention}\PY{p}{)}
    \PY{n}{High} \PY{o}{\PYZlt{}\PYZhy{}} \PY{n+nf}{mean\PYZus{}confin}\PY{p}{(}\PY{n}{data}\PY{p}{[}\PY{n}{Dose}\PY{o}{==}\PY{l+s}{\PYZdq{}}\PY{l+s}{high\PYZdq{}}\PY{p}{,}\PY{p}{]}\PY{o}{\PYZdl{}}\PY{n}{lnRetention}\PY{p}{)}
    \PY{n+nf}{rbind}\PY{p}{(}\PY{n}{Low}\PY{p}{,} \PY{n}{Mid}\PY{p}{,} \PY{n}{High}\PY{p}{)}
\PY{p}{\PYZcb{}}\PY{p}{)}\PY{p}{(}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    A matrix: 3 × 3 of type dbl
\begin{tabular}{r|lll}
  & mean & conf.left & conf.right\\
\hline
	Low & 2.341685 & 2.135709 & 2.547661\\
	Mid & 1.995635 & 1.787163 & 2.204107\\
	High & 1.420526 & 1.223052 & 1.618001\\
\end{tabular}


    
    利用 DescTools 包，求 Bonferroni 同时置信区间：\cite{ref7}

    \begin{tcolorbox}[breakable, size=fbox, boxrule=1pt, pad at break*=1mm,colback=cellbackground, colframe=cellborder]
\prompt{In}{incolor}{14}{\boxspacing}
\begin{Verbatim}[commandchars=\\\{\}]
\PY{c+c1}{\PYZsh{} install.packages(\PYZdq{}DescTools\PYZdq{})}
\PY{n+nf}{library}\PY{p}{(}\PY{n}{DescTools}\PY{p}{)}
\PY{n+nf}{PostHocTest}\PY{p}{(}\PY{n}{lnRetention.aov}\PY{p}{,} \PY{n}{method} \PY{o}{=} \PY{l+s}{\PYZdq{}}\PY{l+s}{bonferroni\PYZdq{}}\PY{p}{)}
\end{Verbatim}
\end{tcolorbox}

    
    \begin{Verbatim}[commandchars=\\\{\}]

  Posthoc multiple comparisons of means : Bonferroni 
    95\% family-wise confidence level

\$FeIon
              diff     lwr.ci      upr.ci   pval    
Fe3-Fe2 -0.2771441 -0.5016931 -0.05259515 0.0161 *  

\$Dose
               diff     lwr.ci       upr.ci    pval    
low-high  0.9211588  0.5836659  1.258651627 4.6e-09 ***
mid-high  0.5751084  0.2376156  0.912601307 0.00021 ***
mid-low  -0.3460503 -0.6835432 -0.008557451 0.04251 *  

\$`FeIon:Dose`
                        diff      lwr.ci     upr.ci    pval    
Fe3:high-Fe2:high -0.5192055 -1.10861287 0.07020194  0.1408    
Fe2:low-Fe2:high   0.7232596  0.13385220 1.31266701  0.0055 ** 
Fe3:low-Fe2:high   0.5998524  0.01044505 1.18925985  0.0425 *  
Fe2:mid-Fe2:high   0.4099156 -0.17949182 0.99932299  0.5859    
Fe3:mid-Fe2:high   0.2210958 -0.36831157 0.81050323  1.0000    
Fe2:low-Fe3:high   1.2424651  0.65305767 1.83187247 9.7e-08 ***
Fe3:low-Fe3:high   1.1190579  0.52965051 1.70846531 1.7e-06 ***
Fe2:mid-Fe3:high   0.9291210  0.33971365 1.51852845  0.0001 ***
Fe3:mid-Fe3:high   0.7403013  0.15089389 1.32970869  0.0040 ** 
Fe3:low-Fe2:low   -0.1234072 -0.71281456 0.46600024  1.0000    
Fe2:mid-Fe2:low   -0.3133440 -0.90275142 0.27606338  1.0000    
Fe3:mid-Fe2:low   -0.5021638 -1.09157118 0.08724362  0.1785    
Fe2:mid-Fe3:low   -0.1899369 -0.77934426 0.39947054  1.0000    
Fe3:mid-Fe3:low   -0.3787566 -0.96816402 0.21065078  0.8427    
Fe3:mid-Fe2:mid   -0.1888198 -0.77822716 0.40058764  1.0000    

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

    \end{Verbatim}



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% - MARK: ref
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    

\begin{thebibliography}{1}

\bibitem{ref1}萌弟. R语言实战之回归分析[EB/OL]. (2020-08-15)[2021-05-30]. https://zhuanlan.zhihu.com/p/184923047
\bibitem{ref2}fitzgerald0. R中的Box-Cox变换[EB/OL]. (2017-07-16)[2021-06-1]. https://blog.csdn.net/fitzgerald0/article/details/75212215
\bibitem{ref3}吴健. 基于R语言进行Box-Cox变换[EB/OL]. (2018-11-19)[2021-06-01]. https://ask.hellobi.com/blog/R\_shequ/18371
\bibitem{ref4}jinzhao. 如何数据正态化Box-Cox变换[EB/OL]. (2018-07-19)[2021-06-02]. https://zhuanlan.zhihu.com/p/40125782
\bibitem{ref5}数据小兵. 用R语言做单因素方差分析及多重比较[EB/OL]. (2019-07-05)[2021-06-03]. http://www.datasoldier.net/archives/1315
\bibitem{ref6}刘小芬. R语言计算置信区间[EB/OL]. (2018-04-15)[2021-06-04]. https://zhuanlan.zhihu.com/p/35713329
\bibitem{ref7}Maurits Evers. Bonferroni Simultaneous Confidence Intervals of differences in means [EB/OL]. (2021-02-18)[2021-06-04]. https://stackoverflow.com/questions/48572619/bonferroni-simultaneous-confidence-intervals-of-differences-in-means
\bibitem{ref8}psych. psych: Procedures for Psychological, Psychometric, and Personality Research[EB/OL]. (2021-03-27)[2021-05-31]. https://cran.r-project.org/web/packages/psych/index.html
\bibitem{ref9}梅长林 范金城. 数据分析方法[M]. 高等教育出版社, 2006.
\bibitem{ref10}R-project. An Introduction to R [M/OL]. https://cran.r-project.org/doc/manuals/r-release/R-intro.html
\end{thebibliography}
    
\end{document}
